org.apache.hadoop.hbase.io.hfile.bucket.BucketAllocator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.io.hfile.bucket.BucketAllocator.java

Source

/**
 * Copyright The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.io.hfile.bucket;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.BucketEntry;

/**
 * This class is used to allocate a block with specified size and free the block
 * when evicting. It manages an array of buckets, each bucket is associated with
 * a size and caches elements up to this size. For a completely empty bucket, this
 * size could be re-specified dynamically.
 * 
 * This class is not thread safe.
 */
@InterfaceAudience.Private
public final class BucketAllocator {
    static final Log LOG = LogFactory.getLog(BucketAllocator.class);

    final private static class Bucket {
        private long baseOffset;
        private int itemAllocationSize, sizeIndex;
        private int itemCount;
        private int freeList[];
        private int freeCount, usedCount;

        public Bucket(long offset) {
            baseOffset = offset;
            sizeIndex = -1;
        }

        void reconfigure(int sizeIndex) {
            this.sizeIndex = sizeIndex;
            assert sizeIndex < BUCKET_SIZES.length;
            itemAllocationSize = BUCKET_SIZES[sizeIndex];
            itemCount = (int) (((long) BUCKET_CAPACITY) / (long) itemAllocationSize);
            freeCount = itemCount;
            usedCount = 0;
            freeList = new int[itemCount];
            for (int i = 0; i < freeCount; ++i)
                freeList[i] = i;
        }

        public boolean isUninstantiated() {
            return sizeIndex == -1;
        }

        public int sizeIndex() {
            return sizeIndex;
        }

        public int itemAllocationSize() {
            return itemAllocationSize;
        }

        public boolean hasFreeSpace() {
            return freeCount > 0;
        }

        public boolean isCompletelyFree() {
            return usedCount == 0;
        }

        public int freeCount() {
            return freeCount;
        }

        public int usedCount() {
            return usedCount;
        }

        public int freeBytes() {
            return freeCount * itemAllocationSize;
        }

        public int usedBytes() {
            return usedCount * itemAllocationSize;
        }

        public long baseOffset() {
            return baseOffset;
        }

        /**
         * Allocate a block in this bucket, return the offset representing the
         * position in physical space
         * @return the offset in the IOEngine
         */
        public long allocate() {
            assert freeCount > 0; // Else should not have been called
            assert sizeIndex != -1;
            ++usedCount;
            long offset = baseOffset + (freeList[--freeCount] * itemAllocationSize);
            assert offset >= 0;
            return offset;
        }

        public void addAllocation(long offset) throws BucketAllocatorException {
            offset -= baseOffset;
            if (offset < 0 || offset % itemAllocationSize != 0)
                throw new BucketAllocatorException("Attempt to add allocation for bad offset: " + offset + " base="
                        + baseOffset + ", bucket size=" + itemAllocationSize);
            int idx = (int) (offset / itemAllocationSize);
            boolean matchFound = false;
            for (int i = 0; i < freeCount; ++i) {
                if (matchFound)
                    freeList[i - 1] = freeList[i];
                else if (freeList[i] == idx)
                    matchFound = true;
            }
            if (!matchFound)
                throw new BucketAllocatorException("Couldn't find match for index " + idx + " in free list");
            ++usedCount;
            --freeCount;
        }

        private void free(long offset) {
            offset -= baseOffset;
            assert offset >= 0;
            assert offset < itemCount * itemAllocationSize;
            assert offset % itemAllocationSize == 0;
            assert usedCount > 0;
            assert freeCount < itemCount; // Else duplicate free
            int item = (int) (offset / (long) itemAllocationSize);
            assert !freeListContains(item);
            --usedCount;
            freeList[freeCount++] = item;
        }

        private boolean freeListContains(int blockNo) {
            for (int i = 0; i < freeCount; ++i) {
                if (freeList[i] == blockNo)
                    return true;
            }
            return false;
        }
    }

    final class BucketSizeInfo {
        // Free bucket means it has space to allocate a block;
        // Completely free bucket means it has no block.
        private List<Bucket> bucketList, freeBuckets, completelyFreeBuckets;
        private int sizeIndex;

        BucketSizeInfo(int sizeIndex) {
            bucketList = new ArrayList<Bucket>();
            freeBuckets = new ArrayList<Bucket>();
            completelyFreeBuckets = new ArrayList<Bucket>();
            this.sizeIndex = sizeIndex;
        }

        public void instantiateBucket(Bucket b) {
            assert b.isUninstantiated() || b.isCompletelyFree();
            b.reconfigure(sizeIndex);
            bucketList.add(b);
            freeBuckets.add(b);
            completelyFreeBuckets.add(b);
        }

        public int sizeIndex() {
            return sizeIndex;
        }

        /**
         * Find a bucket to allocate a block
         * @return the offset in the IOEngine
         */
        public long allocateBlock() {
            Bucket b = null;
            if (freeBuckets.size() > 0) // Use up an existing one first...
                b = freeBuckets.get(freeBuckets.size() - 1);
            if (b == null) {
                b = grabGlobalCompletelyFreeBucket();
                if (b != null)
                    instantiateBucket(b);
            }
            if (b == null)
                return -1;
            long result = b.allocate();
            blockAllocated(b);
            return result;
        }

        void blockAllocated(Bucket b) {
            if (!b.isCompletelyFree())
                completelyFreeBuckets.remove(b);
            if (!b.hasFreeSpace())
                freeBuckets.remove(b);
        }

        public Bucket findAndRemoveCompletelyFreeBucket() {
            Bucket b = null;
            assert bucketList.size() > 0;
            if (bucketList.size() == 1) {
                // So we never get complete starvation of a bucket for a size
                return null;
            }

            if (completelyFreeBuckets.size() > 0) {
                b = completelyFreeBuckets.get(0);
                removeBucket(b);
            }
            return b;
        }

        private void removeBucket(Bucket b) {
            assert b.isCompletelyFree();
            bucketList.remove(b);
            freeBuckets.remove(b);
            completelyFreeBuckets.remove(b);
        }

        public void freeBlock(Bucket b, long offset) {
            assert bucketList.contains(b);
            // else we shouldn't have anything to free...
            assert (!completelyFreeBuckets.contains(b));
            b.free(offset);
            if (!freeBuckets.contains(b))
                freeBuckets.add(b);
            if (b.isCompletelyFree())
                completelyFreeBuckets.add(b);
        }

        public IndexStatistics statistics() {
            long free = 0, used = 0;
            for (Bucket b : bucketList) {
                free += b.freeCount();
                used += b.usedCount();
            }
            return new IndexStatistics(free, used, BUCKET_SIZES[sizeIndex]);
        }
    }

    // Default block size is 64K, so we choose more sizes near 64K, you'd better
    // reset it according to your cluster's block size distribution
    // TODO Make these sizes configurable
    // TODO Support the view of block size distribution statistics
    private static final int BUCKET_SIZES[] = { 4 * 1024 + 1024, 8 * 1024 + 1024, 16 * 1024 + 1024,
            32 * 1024 + 1024, 40 * 1024 + 1024, 48 * 1024 + 1024, 56 * 1024 + 1024, 64 * 1024 + 1024,
            96 * 1024 + 1024, 128 * 1024 + 1024, 192 * 1024 + 1024, 256 * 1024 + 1024, 384 * 1024 + 1024,
            512 * 1024 + 1024 };

    /**
     * Round up the given block size to bucket size, and get the corresponding
     * BucketSizeInfo
     * @param blockSize
     * @return BucketSizeInfo
     */
    public BucketSizeInfo roundUpToBucketSizeInfo(int blockSize) {
        for (int i = 0; i < BUCKET_SIZES.length; ++i)
            if (blockSize <= BUCKET_SIZES[i])
                return bucketSizeInfos[i];
        return null;
    }

    static final int BIG_ITEM_SIZE = (512 * 1024) + 1024; // 513K plus overhead
    static public final int FEWEST_ITEMS_IN_BUCKET = 4;
    // The capacity size for each bucket
    static final long BUCKET_CAPACITY = FEWEST_ITEMS_IN_BUCKET * BIG_ITEM_SIZE;

    private Bucket[] buckets;
    private BucketSizeInfo[] bucketSizeInfos;
    private final long totalSize;
    private long usedSize = 0;

    BucketAllocator(long availableSpace) throws BucketAllocatorException {
        buckets = new Bucket[(int) (availableSpace / (long) BUCKET_CAPACITY)];
        if (buckets.length < BUCKET_SIZES.length)
            throw new BucketAllocatorException("Bucket allocator size too small - must have room for at least "
                    + BUCKET_SIZES.length + " buckets");
        bucketSizeInfos = new BucketSizeInfo[BUCKET_SIZES.length];
        for (int i = 0; i < BUCKET_SIZES.length; ++i) {
            bucketSizeInfos[i] = new BucketSizeInfo(i);
        }
        for (int i = 0; i < buckets.length; ++i) {
            buckets[i] = new Bucket(BUCKET_CAPACITY * i);
            bucketSizeInfos[i < BUCKET_SIZES.length ? i : BUCKET_SIZES.length - 1].instantiateBucket(buckets[i]);
        }
        this.totalSize = ((long) buckets.length) * BUCKET_CAPACITY;
    }

    /**
     * Rebuild the allocator's data structures from a persisted map.
     * @param availableSpace capacity of cache
     * @param map A map stores the block key and BucketEntry(block's meta data
     *          like offset, length)
     * @param realCacheSize cached data size statistics for bucket cache
     * @throws BucketAllocatorException
     */
    BucketAllocator(long availableSpace, Map<BlockCacheKey, BucketEntry> map, AtomicLong realCacheSize)
            throws BucketAllocatorException {
        this(availableSpace);

        // each bucket has an offset, sizeindex. probably the buckets are too big
        // in our default state. so what we do is reconfigure them according to what
        // we've found. we can only reconfigure each bucket once; if more than once,
        // we know there's a bug, so we just log the info, throw, and start again...
        boolean[] reconfigured = new boolean[buckets.length];
        for (Map.Entry<BlockCacheKey, BucketEntry> entry : map.entrySet()) {
            long foundOffset = entry.getValue().offset();
            int foundLen = entry.getValue().getLength();
            int bucketSizeIndex = -1;
            for (int i = 0; i < BUCKET_SIZES.length; ++i) {
                if (foundLen <= BUCKET_SIZES[i]) {
                    bucketSizeIndex = i;
                    break;
                }
            }
            if (bucketSizeIndex == -1) {
                throw new BucketAllocatorException("Can't match bucket size for the block with size " + foundLen);
            }
            int bucketNo = (int) (foundOffset / (long) BUCKET_CAPACITY);
            if (bucketNo < 0 || bucketNo >= buckets.length)
                throw new BucketAllocatorException("Can't find bucket " + bucketNo + ", total buckets="
                        + buckets.length + "; did you shrink the cache?");
            Bucket b = buckets[bucketNo];
            if (reconfigured[bucketNo] == true) {
                if (b.sizeIndex() != bucketSizeIndex)
                    throw new BucketAllocatorException("Inconsistent allocation in bucket map;");
            } else {
                if (!b.isCompletelyFree())
                    throw new BucketAllocatorException(
                            "Reconfiguring bucket " + bucketNo + " but it's already allocated; corrupt data");
                // Need to remove the bucket from whichever list it's currently in at
                // the moment...
                BucketSizeInfo bsi = bucketSizeInfos[bucketSizeIndex];
                BucketSizeInfo oldbsi = bucketSizeInfos[b.sizeIndex()];
                oldbsi.removeBucket(b);
                bsi.instantiateBucket(b);
                reconfigured[bucketNo] = true;
            }
            realCacheSize.addAndGet(foundLen);
            buckets[bucketNo].addAllocation(foundOffset);
            usedSize += buckets[bucketNo].itemAllocationSize();
            bucketSizeInfos[bucketSizeIndex].blockAllocated(b);
        }
    }

    public String getInfo() {
        StringBuilder sb = new StringBuilder(1024);
        for (int i = 0; i < buckets.length; ++i) {
            Bucket b = buckets[i];
            sb.append("    Bucket ").append(i).append(": ").append(b.itemAllocationSize());
            sb.append(" freeCount=").append(b.freeCount()).append(" used=").append(b.usedCount());
            sb.append('\n');
        }
        return sb.toString();
    }

    public long getUsedSize() {
        return this.usedSize;
    }

    public long getFreeSize() {
        long freeSize = this.totalSize - getUsedSize();
        return freeSize;
    }

    public long getTotalSize() {
        return this.totalSize;
    }

    /**
     * Allocate a block with specified size. Return the offset
     * @param blockSize size of block
     * @throws BucketAllocatorException,CacheFullException
     * @return the offset in the IOEngine
     */
    public synchronized long allocateBlock(int blockSize) throws CacheFullException, BucketAllocatorException {
        assert blockSize > 0;
        BucketSizeInfo bsi = roundUpToBucketSizeInfo(blockSize);
        if (bsi == null) {
            throw new BucketAllocatorException("Allocation too big size=" + blockSize);
        }
        long offset = bsi.allocateBlock();

        // Ask caller to free up space and try again!
        if (offset < 0)
            throw new CacheFullException(blockSize, bsi.sizeIndex());
        usedSize += BUCKET_SIZES[bsi.sizeIndex()];
        return offset;
    }

    private Bucket grabGlobalCompletelyFreeBucket() {
        for (BucketSizeInfo bsi : bucketSizeInfos) {
            Bucket b = bsi.findAndRemoveCompletelyFreeBucket();
            if (b != null)
                return b;
        }
        return null;
    }

    /**
     * Free a block with the offset
     * @param offset block's offset
     * @return size freed
     */
    public synchronized int freeBlock(long offset) {
        int bucketNo = (int) (offset / (long) BUCKET_CAPACITY);
        assert bucketNo >= 0 && bucketNo < buckets.length;
        Bucket targetBucket = buckets[bucketNo];
        bucketSizeInfos[targetBucket.sizeIndex()].freeBlock(targetBucket, offset);
        usedSize -= targetBucket.itemAllocationSize();
        return targetBucket.itemAllocationSize();
    }

    public int sizeIndexOfAllocation(long offset) {
        int bucketNo = (int) (offset / (long) BUCKET_CAPACITY);
        assert bucketNo >= 0 && bucketNo < buckets.length;
        Bucket targetBucket = buckets[bucketNo];
        return targetBucket.sizeIndex();
    }

    public int sizeOfAllocation(long offset) {
        int bucketNo = (int) (offset / (long) BUCKET_CAPACITY);
        assert bucketNo >= 0 && bucketNo < buckets.length;
        Bucket targetBucket = buckets[bucketNo];
        return targetBucket.itemAllocationSize();
    }

    static public int getMaximumAllocationIndex() {
        return BUCKET_SIZES.length;
    }

    static class IndexStatistics {
        private long freeCount, usedCount, itemSize, totalCount;

        public long freeCount() {
            return freeCount;
        }

        public long usedCount() {
            return usedCount;
        }

        public long totalCount() {
            return totalCount;
        }

        public long freeBytes() {
            return freeCount * itemSize;
        }

        public long usedBytes() {
            return usedCount * itemSize;
        }

        public long totalBytes() {
            return totalCount * itemSize;
        }

        public long itemSize() {
            return itemSize;
        }

        public IndexStatistics(long free, long used, long itemSize) {
            setTo(free, used, itemSize);
        }

        public IndexStatistics() {
            setTo(-1, -1, 0);
        }

        public void setTo(long free, long used, long itemSize) {
            this.itemSize = itemSize;
            this.freeCount = free;
            this.usedCount = used;
            this.totalCount = free + used;
        }
    }

    public void dumpToLog() {
        logStatistics();
        StringBuilder sb = new StringBuilder();
        for (Bucket b : buckets) {
            sb.append("Bucket:").append(b.baseOffset).append('\n');
            sb.append("  Size index: " + b.sizeIndex() + "; Free:" + b.freeCount + "; used:" + b.usedCount
                    + "; freelist\n");
            for (int i = 0; i < b.freeCount(); ++i)
                sb.append(b.freeList[i]).append(',');
            sb.append('\n');
        }
        LOG.info(sb);
    }

    public void logStatistics() {
        IndexStatistics total = new IndexStatistics();
        IndexStatistics[] stats = getIndexStatistics(total);
        LOG.info("Bucket allocator statistics follow:\n");
        LOG.info("  Free bytes=" + total.freeBytes() + "+; used bytes=" + total.usedBytes() + "; total bytes="
                + total.totalBytes());
        for (IndexStatistics s : stats) {
            LOG.info("  Object size " + s.itemSize() + " used=" + s.usedCount() + "; free=" + s.freeCount()
                    + "; total=" + s.totalCount());
        }
    }

    public IndexStatistics[] getIndexStatistics(IndexStatistics grandTotal) {
        IndexStatistics[] stats = getIndexStatistics();
        long totalfree = 0, totalused = 0;
        for (IndexStatistics stat : stats) {
            totalfree += stat.freeBytes();
            totalused += stat.usedBytes();
        }
        grandTotal.setTo(totalfree, totalused, 1);
        return stats;
    }

    public IndexStatistics[] getIndexStatistics() {
        IndexStatistics[] stats = new IndexStatistics[BUCKET_SIZES.length];
        for (int i = 0; i < stats.length; ++i)
            stats[i] = bucketSizeInfos[i].statistics();
        return stats;
    }

    public long freeBlock(long freeList[]) {
        long sz = 0;
        for (int i = 0; i < freeList.length; ++i)
            sz += freeBlock(freeList[i]);
        return sz;
    }

}