LoggingBufferManager.java :  » Database-DBMS » mckoi » com » mckoi » store » Java Open Source

Java Open Source » Database DBMS » mckoi 
mckoi » com » mckoi » store » LoggingBufferManager.java
/**
 * com.mckoi.store.LoggingBufferManager  10 Jun 2003
 *
 * Mckoi SQL Database ( http://www.mckoi.com/database )
 * Copyright (C) 2000, 2001, 2002  Diehl and Associates, Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * Version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License Version 2 for more details.
 *
 * You should have received a copy of the GNU General Public License
 * Version 2 along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 * Change Log:
 * 
 * 
 */

package com.mckoi.store;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.Arrays;
import java.io.IOException;
import java.io.File;
import com.mckoi.debug.DebugLogger;
import com.mckoi.debug.Lvl;

/**
 * A paged random access buffer manager that caches access between a Store and
 * the underlying filesystem and that also handles check point logging and
 * crash recovery (via a JournalledSystem object).
 *
 * @author Tobias Downer
 */

public class LoggingBufferManager {

  /**
   * Set to true for extra assertions.
   */
  private static boolean PARANOID_CHECKS = false;
  
  /**
   * A timer that represents the T value in buffer pages.
   */
  private long current_T;

  /**
   * The number of pages in this buffer.
   */
  private int current_page_count;
  
  /**
   * The list of all pages.
   */
  private ArrayList page_list;
  
  /**
   * A lock used when accessing the current_T, page_list and current_page_count
   * members.
   */
  private final Object T_lock = new Object();
  
  /**
   * A hash map of all pages currently in memory keyed by store_id and page
   * number.
   * NOTE: This MUST be final for the 'fetchPage' method to be safe.
   */
  private final BMPage[] page_map;
  
  /**
   * A unique id key counter for all stores using this buffer manager.
   */
  private int unique_id_seq;

  /**
   * The JournalledSystem object that handles journalling of all data.
   */
  private JournalledSystem journalled_system;

  /**
   * The maximum number of pages that should be kept in memory before pages
   * are written out to disk.
   */
  private final int max_pages;

  /**
   * The size of each page.
   */
  private final int page_size;

  // ---------- Write locks ----------
  
  /**
   * Set to true when a 'setCheckPoint' is in progress.
   */
  private boolean check_point_in_progress;
  
  /**
   * The number of write locks currently on the buffer.  Any number of write
   * locks can be obtained, however a 'setCheckpoint' can only be achieved
   * when there are no write operations in progress.
   */
  private int write_lock_count;

  /**
   * A mutex for when modifying the write lock information.
   */
  private final Object write_lock = new Object();
  
  
//  /**
//   * The number of cache hits.
//   */
//  private long cache_hit_count;
//
//  /**
//   * The number of cache misses.
//   */
//  private long cache_miss_count;


  
  /**
   * Constructs the manager.
   */
  public LoggingBufferManager(File journal_path, boolean read_only,
                              int max_pages, int page_size,
                              StoreDataAccessorFactory sda_factory,
                              DebugLogger debug, boolean enable_logging) {
    this.max_pages = max_pages;
    this.page_size = page_size;

    check_point_in_progress = false;
    write_lock_count = 0;
    
    current_T = 0;
    page_list = new ArrayList();
    page_map = new BMPage[257];
    unique_id_seq = 0;

    journalled_system = new JournalledSystem(journal_path, read_only,
                               page_size, sda_factory, debug, enable_logging);
  }

  /**
   * Constructs the manager with a scattering store implementation that
   * converts the resource to a file in the given path.
   */
  public LoggingBufferManager(final File resource_path,
        final File journal_path, final boolean read_only, final int max_pages,
        final int page_size, final String file_ext, final long max_slice_size,
        DebugLogger debug, boolean enable_logging) {
    this(journal_path, read_only, max_pages, page_size,
         new StoreDataAccessorFactory() {
      public StoreDataAccessor createStoreDataAccessor(String resource_name) {
        return new ScatteringStoreDataAccessor(resource_path, resource_name,
                                               file_ext, max_slice_size);
      }
    }, debug, enable_logging);
  }

  /**
   * Starts the buffer manager.
   */
  public void start() throws IOException {
    journalled_system.start();
  }

  /**
   * Stops the buffer manager.
   */
  public void stop() throws IOException {
    journalled_system.stop();
  }

  // ----------

  /**
   * Creates a new resource.
   */
  JournalledResource createResource(String resource_name) {
    return journalled_system.createResource(resource_name);
  }

  /**
   * Obtains a write lock on the buffer.  This will block if a 'setCheckPoint'
   * is in progress, otherwise it will always succeed.
   */
  public void lockForWrite() throws InterruptedException {
    synchronized (write_lock) {
      while (check_point_in_progress) {
        write_lock.wait();
      }
      ++write_lock_count;
    }
  }

  /**
   * Releases a write lock on the buffer.  This MUST be called if the
   * 'lockForWrite' method is called.  This should be called from a 'finally'
   * clause.
   */
  public void unlockForWrite() {
    synchronized (write_lock) {
      --write_lock_count;
      write_lock.notifyAll();
    }
  }

  /**
   * Sets a check point in the log.  This logs a point in which a recovery
   * process should at least be able to be rebuild back to.  This will block
   * if there are any write locks.
   * <p>
   * Some things to keep in mind when using this.  You must ensure that no
   * writes can occur while this operation is occuring.  Typically this will
   * happen at the end of a commit but you need to ensure that nothing can
   * happen in the background, such as records being deleted or items being
   * inserted.  It is required that the 'no write' restriction is enforced at
   * a high level.  If care is not taken then the image written will not be
   * clean and if a crash occurs the image that is recovered will not be
   * stable.
   */
  public void setCheckPoint(boolean flush_journals)
                                     throws IOException, InterruptedException {

    // Wait until the writes have finished, and then set the
    // 'check_point_in_progress' boolean.
    synchronized (write_lock) {
      while (write_lock_count > 0) {
        write_lock.wait();
      }
      check_point_in_progress = true;
    }
    
    try {
//      System.out.println("SET CHECKPOINT");
      synchronized (page_map) {
        // Flush all the pages out to the log.
        for (int i = 0; i < page_map.length; ++i) {
          BMPage page = page_map[i];
          BMPage prev = null;

          while (page != null) {
            boolean deleted_hash = false;
            synchronized (page) {
              // Flush the page (will only actually flush if there are changes)
              page.flush();

              // Remove this page if it is no longer in use
              if (page.notInUse()) {
                deleted_hash = true;
                if (prev == null) {
                  page_map[i] = page.hash_next;
                }
                else {
                  prev.hash_next = page.hash_next;
                }
              }

            }
            // Go to next page in hash chain
            if (!deleted_hash) {
              prev = page;
            }
            page = page.hash_next;
          }
        }
      }

      journalled_system.setCheckPoint(flush_journals);

    }
    finally {
      // Make sure we unset the 'check_point_in_progress' boolean and notify
      // any blockers.
      synchronized (write_lock) {
        check_point_in_progress = false;
        write_lock.notifyAll();
      }
    }

  }


  /**
   * Called when a new page is created.
   */
  private void pageCreated(final BMPage page) throws IOException {
    synchronized (T_lock) {

      if (PARANOID_CHECKS) {
        int i = page_list.indexOf(page);
        if (i != -1) {
          BMPage f = (BMPage) page_list.get(i);
          if (f == page) {
            throw new Error("Same page added multiple times.");
          }
          if (f != null) {
            throw new Error("Duplicate pages.");
          }
        }
      }

      page.t = current_T;
      ++current_T;

      ++current_page_count;
      page_list.add(page);

      // Below is the page purge algorithm.  If the maximum number of pages
      // has been created we sort the page list weighting each page by time
      // since last accessed and total number of accesses and clear the bottom
      // 20% of this list.

      // Check if we should purge old pages and purge some if we do...
      if (current_page_count > max_pages) {
        // Purge 20% of the cache
        // Sort the pages by the current formula,
        //  ( 1 / page_access_count ) * (current_t - page_t)
        // Further, if the page has written data then we multiply by 0.75.
        // This scales down page writes so they have a better chance of
        // surviving in the cache than page writes.
        Object[] pages = page_list.toArray();
        Arrays.sort(pages, PAGE_CACHE_COMPARATOR);

        int purge_size = Math.max((int) (pages.length * 0.20f), 2);
        for (int i = 0; i < purge_size; ++i) {
          BMPage dpage = (BMPage) pages[pages.length - (i + 1)];
          synchronized (dpage) {
            dpage.dispose();
          }
        }

        // Remove all the elements from page_list and set it with the sorted
        // list (minus the elements we removed).
        page_list.clear();
        for (int i = 0; i < pages.length - purge_size; ++i) {
          page_list.add(pages[i]);
        }

        current_page_count -= purge_size;
      
      }
    }
  }
  
  /**
   * Called when a page is accessed.
   */
  private void pageAccessed(BMPage page) {
    synchronized (T_lock) {
      page.t = current_T;
      ++current_T;
      ++page.access_count;
    }
  }
  
  /**
   * Calculates a hash code given an id value and a page_number value.
   */
  private static int calcHashCode(long id, long page_number) {
    return (int) ((id << 6) + (page_number * ((id + 25) << 2)));
  }
  
  /**
   * Fetches and returns a page from a store.  Pages may be cached.  If the
   * page is not available in the cache then a new BMPage object is created
   * for the page requested.
   */
  private BMPage fetchPage(JournalledResource data,
                           final long page_number) throws IOException {
    final long id = data.getID();

    BMPage prev_page = null;
    boolean new_page = false;
    BMPage page;

    synchronized (page_map) {
      // Generate the hash code for this page.
      final int p = (calcHashCode(id, page_number) & 0x07FFFFFFF) %
                                                             page_map.length;
      // Search for this page in the hash
      page = page_map[p];
      while (page != null && !page.isPage(id, page_number)) {
        prev_page = page;
        page = page.hash_next;
      }

      // Page isn't found so create it and add to the cache
      if (page == null) {
        page = new BMPage(data, page_number, page_size);
        // Add this page to the map
        page.hash_next = page_map[p];
        page_map[p] = page;
      }
      else {
        // Move this page to the head if it's not already at the head.
        if (prev_page != null) {
          prev_page.hash_next = page.hash_next;
          page.hash_next = page_map[p];
          page_map[p] = page;
        }
      }

      synchronized (page) {
        // If page not in use then it must be newly setup, so add a
        // reference.
        if (page.notInUse()) {
          page.reset();
          new_page = true;
          page.referenceAdd();
        }
        // Add a reference for this fetch
        page.referenceAdd();
      }

    }

    // If the page is new,
    if (new_page) {
      pageCreated(page);
    }
    else {
      pageAccessed(page);
    }

    // Return the page.
    return page;

  }


  // ------
  // Buffered access methods.  These are all thread safe methods.  When a page
  // is accessed the page is synchronized so no 2 or more operations can
  // read/write from the page at the same time.  An operation can read/write to
  // different pages at the same time, however, and this requires thread safety
  // at a lower level (in the JournalledResource implementation).
  // ------

  int readByteFrom(JournalledResource data, long position) throws IOException {
    final long page_number = position / page_size;
    int v;

    BMPage page = fetchPage(data, page_number);
    synchronized (page) {
      try {
        page.initialize();
        v = ((int) page.read((int) (position % page_size))) & 0x0FF;
      }
      finally {
        page.dispose();
      }
    }

    return v;
  }
  
  int readByteArrayFrom(JournalledResource data,
              long position, byte[] buf, int off, int len) throws IOException {

    final int orig_len = len;
    long page_number = position / page_size;
    int start_offset = (int) (position % page_size);
    int to_read = Math.min(len, page_size - start_offset);

    BMPage page = fetchPage(data, page_number);
    synchronized (page) {
      try {
        page.initialize();
        page.read(start_offset, buf, off, to_read);
      }
      finally {
        page.dispose();
      }
    }

    len -= to_read;
    while (len > 0) {
      off += to_read;
      position += to_read;
      ++page_number;
      to_read = Math.min(len, page_size);

      page = fetchPage(data, page_number);
      synchronized (page) {
        try {
          page.initialize();
          page.read(0, buf, off, to_read);
        }
        finally {
          page.dispose();
        }
      }
      len -= to_read;
    }

    return orig_len;
  }
  
  void writeByteTo(JournalledResource data,
                   long position, int b) throws IOException {

    if (PARANOID_CHECKS) {
      synchronized (write_lock) {
        if (write_lock_count == 0) {
          System.out.println("Write without a lock!");
          new Error().printStackTrace();
        }
      }
    }

    final long page_number = position / page_size;

    BMPage page = fetchPage(data, page_number);
    synchronized (page) {
      try {
        page.initialize();
        page.write((int) (position % page_size), (byte) b);
      }
      finally {
        page.dispose();
      }
    }
  }

  void writeByteArrayTo(JournalledResource data,
              long position, byte[] buf, int off, int len) throws IOException {

    if (PARANOID_CHECKS) {
      synchronized (write_lock) {
        if (write_lock_count == 0) {
          System.out.println("Write without a lock!");
          new Error().printStackTrace();
        }
      }
    }

    long page_number = position / page_size;
    int start_offset = (int) (position % page_size);
    int to_write = Math.min(len, page_size - start_offset);

    BMPage page = fetchPage(data, page_number);
    synchronized (page) {
      try {
        page.initialize();
        page.write(start_offset, buf, off, to_write);
      }
      finally {
        page.dispose();
      }
    }
    len -= to_write;

    while (len > 0) {
      off += to_write;
      position += to_write;
      ++page_number;
      to_write = Math.min(len, page_size);

      page = fetchPage(data, page_number);
      synchronized (page) {
        try {
          page.initialize();
          page.write(0, buf, off, to_write);
        }
        finally {
          page.dispose();
        }
      }
      len -= to_write;
    }

  }

  void setDataAreaSize(JournalledResource data,
                       long new_size) throws IOException {
    data.setSize(new_size);
  }

  long getDataAreaSize(JournalledResource data) throws IOException {
    return data.getSize();
  }

  void close(JournalledResource data) throws IOException {
    long id = data.getID();
    // Flush all changes made to the resource then close.
    synchronized (page_map) {
//      System.out.println("Looking for id: " + id);
      // Flush all the pages out to the log.
      // This scans the entire hash for values and could be an expensive
      // operation.  Fortunately 'close' isn't used all that often.
      for (int i = 0; i < page_map.length; ++i) {
        BMPage page = page_map[i];
        BMPage prev = null;

        while (page != null) {
          boolean deleted_hash = false;
          if (page.getID() == id) {
//            System.out.println("Found page id: " + page.getID());
            synchronized (page) {
              // Flush the page (will only actually flush if there are changes)
              page.flush();

              // Remove this page if it is no longer in use
              if (page.notInUse()) {
                deleted_hash = true;
                if (prev == null) {
                  page_map[i] = page.hash_next;
                }
                else {
                  prev.hash_next = page.hash_next;
                }
              }
            }

          }

          // Go to next page in hash chain
          if (!deleted_hash) {
            prev = page;
          }
          page = page.hash_next;

        }
      }
    }

    data.close();
  }
  


  // ---------- Inner classes ----------

  /**
   * A page from a store that is currently being cached in memory.  This is
   * also an element in the cache.
   */
  private static final class BMPage {

    /**
     * The StoreDataAccessor that the page content is part of.
     */
    private final JournalledResource data;
    
    /**
     * The page number.
     */
    private final long page;

    /**
     * The size of the page.
     */
    private final int page_size;
    

    /**
     * The buffer that contains the data for this page.
     */
    private byte[] buffer;

    /**
     * True if this page is initialized.
     */
    private boolean initialized;


    /**
     * A reference to the next page with this hash key.
     */
    BMPage hash_next;


    /**
     * The time this page was last accessed.  This value is reset each time
     * the page is requested.
     */
    long t;

    /**
     * The number of times this page has been accessed since it was created.
     */
    int access_count;


    /**
     * The first position in the buffer that was last written.
     */
    private int first_write_position;

    /**
     * The last position in the buffer that was last written.
     */
    private int last_write_position;

    /**
     * The number of references on this page.
     */
    private int reference_count;


    /**
     * Constructs the page.
     */
    BMPage(JournalledResource data, long page, int page_size) {
      this.data = data;
      this.page = page;
      this.reference_count = 0;
      this.page_size = page_size;
      reset();
    }

    /**
     * Resets this object.
     */
    void reset() {
      // Assert that this is 0
      if (reference_count != 0) {
        throw new Error("reset when 'reference_count' is != 0 ( = " +
                        reference_count + " )");
      }
      this.initialized = false;
      this.t = 0;
      this.access_count = 0;
    }
    
    /**
     * Returns the id of the JournalledResource that is being buffered.
     */
    long getID() {
      return data.getID();
    }
    
    /**
     * Adds 1 to the reference counter on this page.
     */
    void referenceAdd() {
      ++reference_count;
    }

    /**
     * Removes 1 from the reference counter on this page.
     */
    private void referenceRemove() {
      if (reference_count <= 0) {
        throw new Error("Too many reference remove.");
      }
      --reference_count;
    }

    /**
     * Returns true if this PageBuffer is not in use (has 0 reference count and
     * is not inialized.
     */
    boolean notInUse() {
      return reference_count == 0;
//      return (reference_count <= 0 && !initialized);
    }

    /**
     * Returns true if this page matches the given id/page_number.
     */
    boolean isPage(long in_id, long in_page) {
      return (getID() == in_id &&
              page == in_page);
    }
    
    /**
     * Reads the current page content into memory.  This may read from the
     * data files or from a log.
     */
    private void readPageContent(
                   long page_number, byte[] buf, int pos) throws IOException {
      if (pos != 0) {
        throw new Error("Assert failed: pos != 0");
      }
      // Read from the resource
      data.read(page_number, buf, pos);
    }

    /**
     * Flushes this page out to disk, but does not remove from memory.  In a
     * logging system this will flush the changes out to a log.
     */
    void flush() throws IOException {
      if (initialized) {
        if (last_write_position > -1) {
          // Write to the store data.
          data.write(page, buffer, first_write_position,
                     last_write_position - first_write_position);
//          System.out.println("FLUSH " + data + " off = " + first_write_position +
//                             " len = " + (last_write_position - first_write_position));
        }
        first_write_position = Integer.MAX_VALUE;
        last_write_position = -1;
      }
    }
    
    /**
     * Initializes the page buffer.  If the buffer is already initialized then
     * we just return.  If it's not initialized we set up any internal
     * structures that are required to be set up for access to this page.
     */
    void initialize() throws IOException {
      if (!initialized) {

        try {

          // Create the buffer to contain the page in memory
          buffer = new byte[page_size];
          // Read the page.  This will either read the page from the backing
          // store or from a log.
          readPageContent(page, buffer, 0);
          initialized = true;

//          access_count = 0;
          first_write_position = Integer.MAX_VALUE;
          last_write_position = -1;

        }
        catch (IOException e) {
          // This makes debugging a little clearer if 'readPageContent' fails.
          // When 'readPageContent' fails, the dispose method fails also.
          System.out.println("IO Error during page initialize: " + e.getMessage());
          e.printStackTrace();
          throw e;
        }

      }
    }

    /**
     * Disposes of the page buffer if it can be disposed (there are no
     * references to the page and the page is initialized).  When disposed the
     * memory used by the page is reclaimed and the content is written out to
     * disk.
     */
    void dispose() throws IOException {
      referenceRemove();
      if (reference_count == 0) {
        if (initialized) {
          
          // Flushes the page from memory.  This will write the page out to the
          // log.
          flush();

          // Page is no longer initialized.
          initialized = false;
          // Clear the buffer from memory.
          buffer = null;

        }
        else {
          // This happens if initialization failed.  If this case we don't
          // flush out the changes, but we do allow the page to be disposed
          // in the normal way.
          // Note that any exception generated by the initialization failure
          // will propogate correctly.
          buffer = null;
//          throw new RuntimeException(
//                "Assertion failed: tried to dispose an uninitialized page.");
        }
      }
    }


    /**
     * Reads a single byte from the cached page from memory.
     */
    byte read(int pos) {
      return buffer[pos];
    }

    /**
     * Reads a part of this page into the cached page from memory.
     */
    void read(int pos, byte[] buf, int off, int len) {
      System.arraycopy(buffer, pos, buf, off, len);
    }

    /**
     * Writes a single byte to the page in memory.
     */
    void write(int pos, byte v) {
      first_write_position = Math.min(pos, first_write_position);
      last_write_position = Math.max(pos + 1, last_write_position);

      buffer[pos] = v;
    }

    /**
     * Writes to the given part of the page in memory.
     */
    void write(int pos, byte[] buf, int off, int len) {
      first_write_position = Math.min(pos, first_write_position);
      last_write_position = Math.max(pos + len, last_write_position);

      System.arraycopy(buf, off, buffer, pos, len);
    }

    public boolean equals(Object ob) {
      BMPage dest_page = (BMPage) ob;
      return isPage(dest_page.getID(), dest_page.page);
    }
    
  }

  /**
   * A data resource that is being buffered.
   */
  private static class BResource {

    /**
     * The id assigned to the resource.
     */
    private final long id;

    /**
     * The unique name of the resource within the store.
     */
    private final String name;

    /**
     * Constructs the resource.
     */
    BResource(long id, String name) {
      this.id = id;
      this.name = name;
    }

    /**
     * Returns the id assigned to this resource.
     */
    long getID() {
      return id;
    }

    /**
     * Returns the name of this resource.
     */
    String getName() {
      return name;
    }

  }

  /**
   * A Comparator used to sort cache entries.
   */
  private final Comparator PAGE_CACHE_COMPARATOR = new Comparator() {

    /**
     * The calculation for finding the 'weight' of a page in the cache.  A
     * heavier page is sorted lower and is therefore cleared from the cache
     * faster.
     */
    private final float pageEnumValue(BMPage page) {
      // We fix the access counter so it can not exceed 10000 accesses.  I'm
      // a little unsure if we should put this constant in the equation but it
      // ensures that some old but highly accessed page will not stay in the
      // cache forever.
      final long bounded_page_count = Math.min(page.access_count, 10000);
      final float v = (1f / bounded_page_count) * ( current_T - page.t );
      return v;
    }

    public int compare(Object ob1, Object ob2) {
      float v1 = pageEnumValue((BMPage) ob1);
      float v2 = pageEnumValue((BMPage) ob2);
      if (v1 > v2) {
        return 1;
      }
      else if (v1 < v2) {
        return -1;
      }
      return 0;
    }

  };

  /**
   * A factory interface for creating StoreDataAccessor objects from resource
   * names.
   */
  public static interface StoreDataAccessorFactory {
    
    /**
     * Returns a StoreDataAccessor object for the given resource name.
     */
    public StoreDataAccessor createStoreDataAccessor(String resource_name);
    
  }
  
  
}

java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.