org.archive.modules.recrawl.hbase.SingleHBaseTable.java Source code

Introduction

Here is the source code for org.archive.modules.recrawl.hbase.SingleHBaseTable.java
Source

/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.archive.modules.recrawl.hbase;

import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;

/**
 * simple HTable wrapper that shares single instance of HTable among threads.
 * If you only perform get on HTable, this implementation
 * should be good enough. If multiple threads performs Put, {@link HBaseTable}
 * would be more efficient.
 * <p>when HBase I/O fails due to issue with network/region server/zookeeper, this
 * class waits for preset time (see {@link #setReconnectInterval(int)})
 * before trying to reestablish HBase connection. During this hold-ff period, all
 * {@link #get(Get)} and {@link #put(Put)} calls will fail.
 * 
 * @contributor kenji
 */
public class SingleHBaseTable extends HBaseTableBean {
    private static final Log LOG = LogFactory.getLog(SingleHBaseTable.class);

    private HTableInterface table;
    private volatile long tableError;
    private ReentrantReadWriteLock tableUseLock = new ReentrantReadWriteLock();

    boolean autoReconnect = true;

    public boolean isAutoReconnect() {
        return autoReconnect;
    }

    /**
     * if set to {@code true}, HBaseClient tries to reconnect to the HBase master
     * immediately when Put request failed due to connection loss (note {@link #put(Put)}
     * still throws IOException even if autoReconnect is enabled.)
     * @param autoReconnect true to enable auto-reconnect
     */
    public void setAutoReconnect(boolean autoReconnect) {
        this.autoReconnect = autoReconnect;
    }

    protected boolean autoFlush = true;

    /**
     * passed on to HTable's autoFlush property upon creation.
     * @return true for enabling auto-flush.
     */
    public boolean isAutoFlush() {
        return autoFlush;
    }

    public void setAutoFlush(boolean autoFlush) {
        this.autoFlush = autoFlush;
    }

    // default 3 minutes
    private int reconnectInterval = 1000 * 3 * 60;

    public int getReconnectInterval() {
        return reconnectInterval;
    }

    /**
     * set hold-off interval upon communication errors.
     * @param reconnectInterval hold-off interval in milliseconds.
     */
    public void setReconnectInterval(int reconnectInterval) {
        this.reconnectInterval = reconnectInterval;
    }

    // counters

    protected AtomicLong getCount = new AtomicLong();
    // count of GET/PUT failures (i.e. not counting connection failures).
    protected AtomicLong getErrorCount = new AtomicLong();
    protected AtomicLong getSkipCount = new AtomicLong();

    protected AtomicLong putCount = new AtomicLong();
    protected AtomicLong putErrorCount = new AtomicLong();
    protected AtomicLong putSkipCount = new AtomicLong();

    protected AtomicLong connectCount = new AtomicLong();

    public long getGetCount() {
        return getCount.get();
    }

    public long getGetErrorCount() {
        return getErrorCount.get();
    }

    public long getGetSkipCount() {
        return getSkipCount.get();
    }

    public long getPutCount() {
        return putCount.get();
    }

    public long getConnectCount() {
        return connectCount.get();
    }

    // for diagnosing deadlock situation
    public Map<String, Object> getTableLockState() {
        Map<String, Object> m = new LinkedHashMap<String, Object>();
        m.put("readLockCount", tableUseLock.getReadLockCount());
        m.put("queueLength", tableUseLock.getQueueLength());
        m.put("writeLocked", tableUseLock.isWriteLocked());
        return m;
    }

    public SingleHBaseTable() {
    }

    /**
     * attempts to reconnect to HBase if table is null.
     * must not be called with read-lock.
     * @return existing or newly opened HTableInterface.
     */
    protected HTableInterface getTable() {
        if (table == null && autoReconnect)
            openTable();
        return table;
    }

    /**
     * close HTable {@code table}, set current time to tableError if closing because
     * of a communication error. should be called with write lock.
     * @param htable HTable to close.
     * @param byError true if closing because of an error.
     */
    protected void closeTable(HTableInterface htable, boolean byError) {
        if (htable == null)
            return;
        if (table != htable) {
            // other thread did closeTable on htable. don't close table.
            return;
        }
        try {
            table = null;
            htable.close();
        } catch (IOException ex) {
            LOG.warn("error closing " + htable + " - some commits may have been lost");
        }
        if (byError) {
            tableError = System.currentTimeMillis();
        }
    }

    public void put(Put p) throws IOException {
        putCount.incrementAndGet();
        // trigger reconnection if necessary. as table can be modified before
        // read lock is acquired, we don't read table variable here.
        getTable();
        boolean htableFailed = false;
        HTableInterface htable = null;
        Lock readLock = tableUseLock.readLock();
        try {
            if (!readLock.tryLock(TRY_READ_LOCK_TIMEOUT, TimeUnit.SECONDS)) {
                putSkipCount.incrementAndGet();
                throw new IOException("could not acquire read lock for HTable.");
            }
        } catch (InterruptedException ex) {
            throw new IOException("interrupted while acquiring read lock", ex);
        }
        try {
            htable = table;
            if (htable == null) {
                putSkipCount.incrementAndGet();
                throw new IOException("HBase connection is unvailable.");
            }
            // HTable.put() buffers Puts and access to the buffer is not
            // synchronized.
            synchronized (htable) {
                try {
                    htable.put(p);
                } catch (NullPointerException ex) {
                    // HTable.put() throws NullPointerException when connection is lost.
                    // It is somewhat weird, so translate it to IOException.
                    putErrorCount.incrementAndGet();
                    htableFailed = true;
                    throw new IOException("hbase connection is lost", ex);
                } catch (NotServingRegionException ex) {
                    putErrorCount.incrementAndGet();
                    // no need to close HTable.
                    throw ex;
                } catch (IOException ex) {
                    putErrorCount.incrementAndGet();
                    htableFailed = true;
                    throw ex;
                }
            }
        } finally {
            readLock.unlock();
            if (htableFailed) {
                closeTable(htable, true);
            }
        }
    }

    public Result get(Get g) throws IOException {
        getCount.incrementAndGet();
        // trigger reconnection if necessary. as table can be modified before
        // read lock is acquired, we don't read table variable here.
        getTable();
        boolean htableFailed = false;
        HTableInterface htable = null;
        Lock readLock = tableUseLock.readLock();
        try {
            if (!readLock.tryLock(TRY_READ_LOCK_TIMEOUT, TimeUnit.SECONDS)) {
                getSkipCount.incrementAndGet();
                throw new IOException("could not acquire read lock for HTable.");
            }
        } catch (InterruptedException ex) {
            throw new IOException("interrupted while acquiring read lock", ex);
        }
        try {
            htable = table;
            if (htable == null) {
                getSkipCount.incrementAndGet();
                throw new IOException("HBase connection is unvailable.");
            }
            try {
                return htable.get(g);
            } catch (NotServingRegionException ex) {
                // caused by disruption to HBase cluster. no need to
                // refresh HBase connection, since connection itself
                // is working okay.
                // TODO: should we need to back-off for a while? other
                // regions may still be accessible.
                getErrorCount.incrementAndGet();
                throw ex;
            } catch (IOException ex) {
                getErrorCount.incrementAndGet();
                htableFailed = true;
                throw ex;
            }
        } finally {
            readLock.unlock();
            if (htableFailed) {
                closeTable(htable, true);
            }
        }
    }

    @Override
    public HTableDescriptor getHtableDescriptor() throws IOException {
        HTableInterface table = getTable();
        if (table == null) {
            throw new IOException("HBase connection is unavailable.");
        }
        return table.getTableDescriptor();
    }

    public boolean inBackoffPeriod() {
        return (tableError > 0 && (System.currentTimeMillis() - tableError) < reconnectInterval);
    }

    /**
     * timestamp of the last Put/Get error.
     * @return timestamp in ms.
     */
    public long getTableErrorTime() {
        return tableError;
    }

    /**
     * connect to HBase.
     * it does nothing if table is non-null, or it is in the back-off period since
     * the last error.
     * should be called with write lock.
     */
    protected boolean openTable() {
        if (table != null)
            return true;
        // fail immediately if we're in back-off period.
        if (inBackoffPeriod())
            return false;
        try {
            HTable t = new HTable(hbase.configuration(), Bytes.toBytes(htableName));
            connectCount.incrementAndGet();
            t.setAutoFlush(autoFlush);
            table = t;
            tableError = 0;
            return true;
        } catch (TableNotFoundException ex) {
            // ex.getMessage() only has table name. be a little bit more friendly.
            LOG.warn("failed to connect to HTable \"" + htableName + "\": Table Not Found");
            tableError = System.currentTimeMillis();
            return false;
        } catch (IOException ex) {
            LOG.warn("failed to connect to HTable \"" + htableName + "\" (" + ex.getMessage() + ")");
            tableError = System.currentTimeMillis();
            return false;
        }
    }

    /**
     * number of seconds to wait for acquiring read lock.
     * if read lock is not acquired within this many seconds (probably
     * due to deadlock situation on write-lock side), {@link #get(Get)} will
     * <i>silently</i> fail.
     */
    public final static long TRY_READ_LOCK_TIMEOUT = 5;
    /**
     * number of seconds to wait for acquiring write lock.
     */
    public final static long TRY_WRITE_LOCK_TIMEOUT = 10;

    /**
     * close current connection and establish new connection.
     * fails silently if back-off period is in effect.
     */
    protected void reconnect(boolean onerror) throws IOException, InterruptedException {
        // avoid deadlock situation caused by attempting
        // to acquire write lock while holding read lock.
        // there'd be no real dead-lock now that timeout on write lock is implemented,
        // but it's nice to know there's a bug in locking.
        if (tableUseLock.getReadHoldCount() > 0) {
            LOG.warn("avoiding deadlock: reconnect() called by thread with read lock.");
            return;
        }
        Lock writeLock = tableUseLock.writeLock();
        if (!writeLock.tryLock(TRY_WRITE_LOCK_TIMEOUT, TimeUnit.SECONDS)) {
            LOG.warn("reconnect() could not acquire write lock on tableUseLock for " + TRY_WRITE_LOCK_TIMEOUT
                    + "s, giving up.");
            return;
        }
        try {
            closeTable(table, onerror);
            openTable();
        } finally {
            writeLock.unlock();
        }
    }

    /**
     * close current connection and establish new connection.
     * for refreshing stale connection through scripting.
     * resets tableErrorTime to zero (it will be set to non-zero if
     * reconnection attempt fails).
     * @throws IOException
     * @throws InterruptedException
     */
    public void reconnect() throws IOException, InterruptedException {
        tableError = 0;
        reconnect(false);
    }

    //    public boolean isRunning() {
    //        return table != null;
    //    }
    public void start() {
        super.start();
        openTable();
    }

    public void stop() {
        if (table != null) {
            try {
                table.close();
            } catch (IOException ex) {
                LOG.warn("table.close() failed", ex);
            }
        }
        table = null;
        super.stop();
    }
}