com.moz.fiji.schema.impl.hbase.HBaseFijiTable.java Source code

Java tutorial

Introduction

Here is the source code for com.moz.fiji.schema.impl.hbase.HBaseFijiTable.java

Source

/**
 * (c) Copyright 2012 WibiData, Inc.
 *
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.moz.fiji.schema.impl.hbase;

import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;

import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.moz.fiji.annotations.ApiAudience;
import com.moz.fiji.commons.ResourceTracker;
import com.moz.fiji.schema.EntityId;
import com.moz.fiji.schema.EntityIdFactory;
import com.moz.fiji.schema.InternalFijiError;
import com.moz.fiji.schema.Fiji;
import com.moz.fiji.schema.FijiIOException;
import com.moz.fiji.schema.FijiReaderFactory;
import com.moz.fiji.schema.FijiRegion;
import com.moz.fiji.schema.FijiTable;
import com.moz.fiji.schema.FijiTableAnnotator;
import com.moz.fiji.schema.FijiTableNotFoundException;
import com.moz.fiji.schema.FijiTableReader;
import com.moz.fiji.schema.FijiTableWriter;
import com.moz.fiji.schema.FijiURI;
import com.moz.fiji.schema.FijiWriterFactory;
import com.moz.fiji.schema.avro.RowKeyFormat;
import com.moz.fiji.schema.avro.RowKeyFormat2;
import com.moz.fiji.schema.hbase.HBaseFactory;
import com.moz.fiji.schema.hbase.FijiManagedHBaseTableName;
import com.moz.fiji.schema.impl.HTableInterfaceFactory;
import com.moz.fiji.schema.impl.LayoutConsumer;
import com.moz.fiji.schema.impl.LayoutConsumer.Registration;
import com.moz.fiji.schema.layout.HBaseColumnNameTranslator;
import com.moz.fiji.schema.layout.FijiTableLayout;
import com.moz.fiji.schema.layout.impl.TableLayoutMonitor;
import com.moz.fiji.schema.util.ResourceUtils;
import com.moz.fiji.schema.util.VersionInfo;

/**
 * <p>A FijiTable that exposes the underlying HBase implementation.</p>
 *
 * <p>Within the internal Fiji code, we use this class so that we have
 * access to the HTable interface.  Methods that Fiji clients should
 * have access to should be added to com.moz.fiji.schema.FijiTable.</p>
 */
@ApiAudience.Private
public final class HBaseFijiTable implements FijiTable {
    private static final Logger LOG = LoggerFactory.getLogger(HBaseFijiTable.class);

    /** String identifying the scheme of a URI to an HDFS path. */
    private static final String HDFS_SCHEME = "hdfs";

    /** String required for requesting an HDFS delegation token. */
    private static final String RENEWER = "renewer";

    /** The fiji instance this table belongs to. */
    private final HBaseFiji mFiji;

    /** The name of this table (the Fiji name, not the HBase name). */
    private final String mName;

    /** URI of this table. */
    private final FijiURI mTableURI;

    /** States of a fiji table instance. */
    private static enum State {
        /**
         * Initialization begun but not completed.  Retain counter and ResourceTracker counters
         * have not been incremented yet.
         */
        UNINITIALIZED,
        /**
         * Finished initialization.  Both retain counters and ResourceTracker counters have been
         * incremented.  Resources are successfully opened and this HBaseFijiTable's methods may be
         * used.
         */
        OPEN,
        /**
         * Closed.  Other methods are no longer supported.  Resources and connections have been closed.
         */
        CLOSED
    }

    /** Tracks the state of this fiji table. */
    private final AtomicReference<State> mState = new AtomicReference<State>(State.UNINITIALIZED);

    /** HTableInterfaceFactory for creating new HTables associated with this FijiTable. */
    private final HTableInterfaceFactory mHTableFactory;

    /** The factory for EntityIds. */
    private final EntityIdFactory mEntityIdFactory;

    /** Retain counter. When decreased to 0, the HBase FijiTable may be closed and disposed of. */
    private final AtomicInteger mRetainCount = new AtomicInteger(0);

    /** Configuration object for new HTables. */
    private final Configuration mConf;

    /** Writer factory for this table. */
    private final FijiWriterFactory mWriterFactory;

    /** Reader factory for this table. */
    private final FijiReaderFactory mReaderFactory;

    /** HConnection used for creating lightweight tables. Should be closed by us. */
    private final HConnection mHConnection;

    /** Name of the HBase table backing this Fiji table. */
    private final String mHBaseTableName;

    /**
     * Monitor for the layout of this table.
     **/
    private final TableLayoutMonitor mLayoutMonitor;

    /**
     * Construct an opened Fiji table stored in HBase.
     *
     * @param fiji The Fiji instance.
     * @param name The name of the Fiji user-space table to open.
     * @param conf The Hadoop configuration object.
     * @param htableFactory A factory that creates HTable objects.
     * @param layoutMonitor a valid TableLayoutMonitor for this table.
     * @throws IOException On an HBase error.
     * @throws FijiTableNotFoundException if the table does not exist.
     */
    HBaseFijiTable(HBaseFiji fiji, String name, Configuration conf, HTableInterfaceFactory htableFactory,
            TableLayoutMonitor layoutMonitor) throws IOException {
        mFiji = fiji;
        mFiji.retain();

        mName = name;
        mHTableFactory = htableFactory;
        mConf = conf;
        mTableURI = FijiURI.newBuilder(mFiji.getURI()).withTableName(mName).build();
        LOG.debug("Opening Fiji table '{}' with client version '{}'.", mTableURI, VersionInfo.getSoftwareVersion());
        mHBaseTableName = FijiManagedHBaseTableName.getFijiTableName(mTableURI.getInstance(), mName).toString();

        if (!mFiji.getTableNames().contains(mName)) {
            closeResources();
            throw new FijiTableNotFoundException(mTableURI);
        }

        mWriterFactory = new HBaseFijiWriterFactory(this);
        mReaderFactory = new HBaseFijiReaderFactory(this);

        mLayoutMonitor = layoutMonitor;
        mEntityIdFactory = createEntityIdFactory(mLayoutMonitor.getLayout());

        mHConnection = HBaseFactory.Provider.get().getHConnection(mFiji);

        // Table is now open and must be released properly:
        mRetainCount.set(1);

        final State oldState = mState.getAndSet(State.OPEN);
        Preconditions.checkState(oldState == State.UNINITIALIZED, "Cannot open FijiTable instance in state %s.",
                oldState);
        ResourceTracker.get().registerResource(this);
    }

    /**
     * Constructs an Entity ID factory from a layout capsule.
     *
     * @param layout layout to construct an entity ID factory from.
     * @return a new entity ID factory as described from the table layout.
     */
    private static EntityIdFactory createEntityIdFactory(final FijiTableLayout layout) {
        final Object format = layout.getDesc().getKeysFormat();
        if (format instanceof RowKeyFormat) {
            return EntityIdFactory.getFactory((RowKeyFormat) format);
        } else if (format instanceof RowKeyFormat2) {
            return EntityIdFactory.getFactory((RowKeyFormat2) format);
        } else {
            throw new RuntimeException("Invalid Row Key format found in Fiji Table: " + format);
        }
    }

    /** {@inheritDoc} */
    @Override
    public EntityId getEntityId(Object... fijiRowKey) {
        return mEntityIdFactory.getEntityId(fijiRowKey);
    }

    /** {@inheritDoc} */
    @Override
    public Fiji getFiji() {
        return mFiji;
    }

    /** {@inheritDoc} */
    @Override
    public String getName() {
        return mName;
    }

    /** {@inheritDoc} */
    @Override
    public FijiURI getURI() {
        return mTableURI;
    }

    /**
     * Register a layout consumer that must be updated before this table will report that it has
     * completed a table layout update.  Sends the first update immediately before returning. The
     * returned registration object must be closed when layout updates are no longer needed.
     *
     * @param consumer the LayoutConsumer to be registered.
     * @return a registration object which must be closed when layout updates are no longer needed.
     * @throws IOException in case of an error updating the LayoutConsumer.
     */
    public Registration registerLayoutConsumer(LayoutConsumer consumer) throws IOException {
        final State state = mState.get();
        Preconditions.checkState(state == State.OPEN,
                "Cannot register a new layout consumer to a FijiTable in state %s.", state);
        return mLayoutMonitor.registerLayoutConsumer(consumer);
    }

    /**
     * Get the TableLayoutMonitor which is associated with this HBaseFijiTable.
     *
     * @return the TableLayoutMonitor associated with this HBaseFijiTable.
     */
    public TableLayoutMonitor getTableLayoutMonitor() {
        final State state = mState.get();
        Preconditions.checkState(state == State.OPEN,
                "Cannot get a table layout monitor from a FijiTable in state %s.", state);
        return mLayoutMonitor;
    }

    /**
     * Opens a new connection to the HBase table backing this Fiji table.
     *
     * <p> The caller is responsible for properly closing the connection afterwards. </p>
     * <p>
     *   Note: this does not necessarily create a new HTable instance, but may instead return
     *   an already existing HTable instance from a pool managed by this HBaseFijiTable.
     *   Closing a pooled HTable instance internally moves the HTable instance back into the pool.
     * </p>
     *
     * @return A new HTable associated with this FijiTable.
     * @throws IOException in case of an error.
     */
    public HTableInterface openHTableConnection() throws IOException {
        final State state = mState.get();
        Preconditions.checkState(state == State.OPEN,
                "Cannot open an HTable connection for a FijiTable in state %s.", state);
        return mHConnection.getTable(mHBaseTableName);
    }

    /**
     * {@inheritDoc}
     * If you need both the table layout and a column name translator within a single short lived
     * operation, you should create the column name translator directly from the returned layout.
     */
    @Override
    public FijiTableLayout getLayout() {
        final State state = mState.get();
        Preconditions.checkState(state == State.OPEN, "Cannot get the layout of a table in state %s.", state);
        return mLayoutMonitor.getLayout();
    }

    /**
     * Get the column name translator for the current layout of this table.  Do not cache this object.
     * If you need both the table layout and a column name translator within a single short lived
     * operation, you should use {@link #getLayout()}} and create your own
     * {@link HBaseColumnNameTranslator} to ensure consistent state.
     * @return the column name translator for the current layout of this table.
     */
    public HBaseColumnNameTranslator getColumnNameTranslator() {
        final State state = mState.get();
        Preconditions.checkState(state == State.OPEN,
                "Cannot get the column name translator of a table in state %s.", state);
        return HBaseColumnNameTranslator.from(getLayout());
    }

    /** {@inheritDoc} */
    @Override
    public FijiTableReader openTableReader() {
        final State state = mState.get();
        Preconditions.checkState(state == State.OPEN, "Cannot open a table reader on a FijiTable in state %s.",
                state);
        try {
            return HBaseFijiTableReader.create(this);
        } catch (IOException ioe) {
            throw new FijiIOException(ioe);
        }
    }

    /** {@inheritDoc} */
    @Override
    public FijiTableWriter openTableWriter() {
        final State state = mState.get();
        Preconditions.checkState(state == State.OPEN, "Cannot open a table writer on a FijiTable in state %s.",
                state);
        try {
            return new HBaseFijiTableWriter(this);
        } catch (IOException ioe) {
            throw new FijiIOException(ioe);
        }
    }

    /** {@inheritDoc} */
    @Override
    public FijiReaderFactory getReaderFactory() throws IOException {
        final State state = mState.get();
        Preconditions.checkState(state == State.OPEN, "Cannot get the reader factory for a FijiTable in state %s.",
                state);
        return mReaderFactory;
    }

    /** {@inheritDoc} */
    @Override
    public FijiWriterFactory getWriterFactory() throws IOException {
        final State state = mState.get();
        Preconditions.checkState(state == State.OPEN, "Cannot get the writer factory for a FijiTable in state %s.",
                state);
        return mWriterFactory;
    }

    /**
     * Return the regions in this table as a list.
     *
     * <p>This method was copied from HFileOutputFormat of 0.90.1-cdh3u0 and modified to
     * return FijiRegion instead of ImmutableBytesWritable.</p>
     *
     * @return An ordered list of the table regions.
     * @throws IOException on I/O error.
     */
    @Override
    public List<FijiRegion> getRegions() throws IOException {
        final State state = mState.get();
        Preconditions.checkState(state == State.OPEN, "Cannot get the regions for a FijiTable in state %s.", state);
        final HBaseAdmin hbaseAdmin = ((HBaseFiji) getFiji()).getHBaseAdmin();
        final HTableInterface htable = mHTableFactory.create(mConf, mHBaseTableName);
        try {
            final List<HRegionInfo> regions = hbaseAdmin.getTableRegions(htable.getTableName());
            final List<FijiRegion> result = Lists.newArrayList();

            // If we can get the concrete HTable, we can get location information.
            if (htable instanceof HTable) {
                LOG.debug("Casting HTableInterface to an HTable.");
                final HTable concreteHBaseTable = (HTable) htable;
                for (HRegionInfo region : regions) {
                    List<HRegionLocation> hLocations = concreteHBaseTable.getRegionsInRange(region.getStartKey(),
                            region.getEndKey());
                    result.add(new HBaseFijiRegion(region, hLocations));
                }
            } else {
                LOG.warn("Unable to cast HTableInterface {} to an HTable.  "
                        + "Creating Fiji regions without location info.", getURI());
                for (HRegionInfo region : regions) {
                    result.add(new HBaseFijiRegion(region));
                }
            }

            return result;

        } finally {
            htable.close();
        }
    }

    /** {@inheritDoc} */
    @Override
    public Collection<HBaseFijiPartition> getPartitions() throws IOException {
        try (HTableInterface htable = mHTableFactory.create(mConf, mHBaseTableName)) {
            return HBaseFijiPartition.getPartitions((HTable) htable);
        }
    }

    /** {@inheritDoc} */
    @Override
    public FijiTableAnnotator openTableAnnotator() throws IOException {
        final State state = mState.get();
        Preconditions.checkState(state == State.OPEN, "Cannot get the TableAnnotator for a table in state: %s.",
                state);
        return new HBaseFijiTableAnnotator(this);
    }

    /**
     * Releases the resources used by this table.
     *
     * @throws IOException on I/O error.
     */
    private void closeResources() throws IOException {
        final State oldState = mState.getAndSet(State.CLOSED);
        Preconditions.checkState(oldState == State.OPEN || oldState == State.UNINITIALIZED,
                "Cannot close FijiTable instance %s in state %s.", this, oldState);
        LOG.debug("Closing HBaseFijiTable '{}'.", this);

        ResourceUtils.closeOrLog(mHConnection);
        ResourceUtils.closeOrLog(mLayoutMonitor);
        ResourceUtils.releaseOrLog(mFiji);
        if (oldState != State.UNINITIALIZED) {
            ResourceTracker.get().unregisterResource(this);
        }

        LOG.debug("HBaseFijiTable '{}' closed.", mTableURI);
    }

    /** {@inheritDoc} */
    @Override
    public FijiTable retain() {
        final int counter = mRetainCount.getAndIncrement();
        Preconditions.checkState(counter >= 1, "Cannot retain a closed FijiTable %s: retain counter was %s.",
                mTableURI, counter);
        return this;
    }

    /** {@inheritDoc} */
    @Override
    public void release() throws IOException {
        final int counter = mRetainCount.decrementAndGet();
        Preconditions.checkState(counter >= 0, "Cannot release closed FijiTable %s: retain counter is now %s.",
                mTableURI, counter);
        if (counter == 0) {
            closeResources();
        }
    }

    /** {@inheritDoc} */
    @Override
    public boolean equals(Object obj) {
        if (null == obj) {
            return false;
        }
        if (obj == this) {
            return true;
        }
        if (!getClass().equals(obj.getClass())) {
            return false;
        }
        final FijiTable other = (FijiTable) obj;

        // Equal if the two tables have the same URI:
        return mTableURI.equals(other.getURI());
    }

    /** {@inheritDoc} */
    @Override
    public int hashCode() {
        return mTableURI.hashCode();
    }

    /** {@inheritDoc} */
    @Override
    public String toString() {
        String layoutId = mState.get() == State.OPEN ? mLayoutMonitor.getLayout().getDesc().getLayoutId()
                : "unknown";
        return Objects.toStringHelper(HBaseFijiTable.class).add("id", System.identityHashCode(this))
                .add("uri", mTableURI).add("retain_counter", mRetainCount.get()).add("layout_id", layoutId)
                .add("state", mState.get()).toString();
    }

    /**
     * We know that all FijiTables are really HBaseFijiTables
     * instances.  This is a convenience method for downcasting, which
     * is common within the internals of Fiji code.
     *
     * @param fijiTable The Fiji table to downcast to an HBaseFijiTable.
     * @return The given Fiji table as an HBaseFijiTable.
     */
    public static HBaseFijiTable downcast(FijiTable fijiTable) {
        if (!(fijiTable instanceof HBaseFijiTable)) {
            // This should really never happen.  Something is seriously
            // wrong with Fiji code if we get here.
            throw new InternalFijiError("Found a FijiTable object that was not an instance of HBaseFijiTable.");
        }
        return (HBaseFijiTable) fijiTable;
    }

    /**
     * Creates a new HFile loader.
     *
     * @param conf Configuration object for the HFile loader.
     * @return the new HFile loader.
     */
    private static LoadIncrementalHFiles createHFileLoader(Configuration conf) {
        try {
            return new LoadIncrementalHFiles(conf); // throws Exception
        } catch (Exception exn) {
            throw new InternalFijiError(exn);
        }
    }

    /**
     * Loads partitioned HFiles directly into the regions of this Fiji table.
     *
     * @param hfilePath Path of the HFiles to load.
     * @throws IOException on I/O error.
     */
    public void bulkLoad(Path hfilePath) throws IOException {
        final LoadIncrementalHFiles loader = createHFileLoader(mConf);

        final String hFileScheme = hfilePath.toUri().getScheme();
        Token<DelegationTokenIdentifier> hdfsDelegationToken = null;

        // If we're bulk loading from a secure HDFS, we should request and forward a delegation token.
        // LoadIncrementalHfiles will actually do this if none is provided, but because we call it
        // repeatedly in a short amount of time, this seems to trigger a possible race condition
        // where we ask to load the next HFile while there is a pending token cancellation request.
        // By requesting the token ourselves, it is re-used for each bulk load call.
        // Once we're done with the bulk loader we cancel the token.
        if (UserGroupInformation.isSecurityEnabled() && hFileScheme.equals(HDFS_SCHEME)) {
            final UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
            final DistributedFileSystem fileSystem = (DistributedFileSystem) hfilePath.getFileSystem(mConf);
            hdfsDelegationToken = fileSystem.getDelegationToken(RENEWER);
            ugi.addToken(hdfsDelegationToken);
        }

        try {
            // LoadIncrementalHFiles.doBulkLoad() requires an HTable instance, not an HTableInterface:
            final HTable htable = (HTable) mHTableFactory.create(mConf, mHBaseTableName);
            try {
                final List<Path> hfilePaths = Lists.newArrayList();

                // Try to find any hfiles for partitions within the passed in path
                final FileStatus[] hfiles = hfilePath.getFileSystem(mConf).globStatus(new Path(hfilePath, "*"));
                for (FileStatus hfile : hfiles) {
                    String partName = hfile.getPath().getName();
                    if (!partName.startsWith("_") && partName.endsWith(".hfile")) {
                        Path partHFile = new Path(hfilePath, partName);
                        hfilePaths.add(partHFile);
                    }
                }
                if (hfilePaths.isEmpty()) {
                    // If we didn't find any parts, add in the passed in parameter
                    hfilePaths.add(hfilePath);
                }
                for (Path path : hfilePaths) {
                    loader.doBulkLoad(path, htable);
                    LOG.info("Successfully loaded: " + path.toString());
                }
            } finally {
                htable.close();
            }
        } catch (TableNotFoundException tnfe) {
            throw new InternalFijiError(tnfe);
        }

        // Cancel the HDFS delegation token if we requested one.
        if (null != hdfsDelegationToken) {
            try {
                hdfsDelegationToken.cancel(mConf);
            } catch (InterruptedException e) {
                LOG.warn("Failed to cancel HDFS delegation token.", e);
            }
        }
    }
}