CachedBdbMap.java :  » Web-Crawler » heritrix » org » archive » util » Java Open Source

Java Open Source » Web Crawler » heritrix 
heritrix » org » archive » util » CachedBdbMap.java
/* CachedBdbMap
 * 
 * $Id: CachedBdbMap.java 4926 2007-02-21 06:02:36Z gojomo $
 * 
 * Created on Mar 24, 2004
 *
 * Copyright (C) 2004 Internet Archive.
 *
 * This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 * Heritrix is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
 * any later version.
 *
 * Heritrix is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser Public License for more details.
 *
 * You should have received a copy of the GNU Lesser Public License
 * along with Heritrix; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
package org.archive.util;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.lang.ref.PhantomReference;
import java.lang.ref.Reference;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.SoftReference;
import java.lang.reflect.Field;
import java.util.AbstractMap;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.sleepycat.bind.EntryBinding;
import com.sleepycat.bind.serial.SerialBinding;
import com.sleepycat.bind.serial.StoredClassCatalog;
import com.sleepycat.bind.tuple.TupleBinding;
import com.sleepycat.collections.StoredSortedMap;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;

/**
 * A BDB JE backed hashmap. It extends the normal BDB JE map implementation by
 * holding a cache of soft referenced objects. That is objects are not written
 * to disk until they are not referenced by any other object and therefore can be
 * Garbage Collected.
 * 
 * @author John Erik Halse
 * @author stack
 * @author gojomo
 *  
 */
public class CachedBdbMap<K,V> extends AbstractMap<K,V> 
implements Map<K,V>, Serializable {
    
    private static final long serialVersionUID = -8655539411367047332L;

    private static final Logger logger =
        Logger.getLogger(CachedBdbMap.class.getName());

    /** The database name of the class definition catalog.*/
    private static final String CLASS_CATALOG = "java_class_catalog";

    /**
     * A map of BDB JE Environments so that we reuse the Environment for
     * databases in the same directory.
     */
    private static final Map<String,DbEnvironmentEntry> dbEnvironmentMap = 
        new HashMap<String,DbEnvironmentEntry>();

    /** The BDB JE environment used for this instance.
     */
    private transient DbEnvironmentEntry dbEnvironment;

    /** The BDB JE database used for this instance. */
    protected transient Database db;

    /** The Collection view of the BDB JE database used for this instance. */
    protected transient StoredSortedMap diskMap;

    /** The softreferenced cache */
    private transient Map<K,SoftEntry<V>> memMap;

    protected transient ReferenceQueue<V> refQueue;

    /** The number of objects in the diskMap StoredMap. 
     *  (Package access for unit testing.) */
    protected int diskMapSize = 0;

    /**
     * Count of times we got an object from in-memory cache.
     */
    private long cacheHit = 0;

    /**
     * Count of times the {@link CachedBdbMap#get(Object)} method was called.
     */
    private long countOfGets = 0;

    /**
     * Count of every time we went to the disk-based map AND we found an
     * object (Doesn't include accesses that came back null).
     */
    private long diskHit = 0;
    
    /**
     * Name of bdbje db.
     */
    private String dbName = null;

    /**
     * Reference to the Reference#referent Field.
     */
    protected static Field referentField;
    static {
        // We need access to the referent field in the PhantomReference.
        // For more on this trick, see
        // http://www.javaspecialists.co.za/archive/Issue098.html and for
        // discussion:
        // http://www.theserverside.com/tss?service=direct/0/NewsThread/threadViewer.markNoisy.link&sp=l29865&sp=l146901
        try {
            referentField = Reference.class.getDeclaredField("referent");
            referentField.setAccessible(true);
        } catch (SecurityException e) {
            throw new RuntimeException(e);
        } catch (NoSuchFieldException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * Simple structure to keep needed information about a DB Environment.
     */
    protected static class DbEnvironmentEntry {
        Environment environment;
        StoredClassCatalog classCatalog;
        int openDbCount = 0;
        File dbDir;
    }
    
    /**
     * Shudown default constructor.
     */
    private CachedBdbMap() {
        super();
    }
    
    /**
     * Constructor.
     * 
     * You must call
     * {@link #initialize(Environment, Class, Class, StoredClassCatalog)}
     * to finish construction. Construction is two-stepped to support
     * reconnecting a deserialized CachedBdbMap with its backing bdbje
     * database.
     * 
     * @param dbName Name of the backing db this instance should use.
     */
    public CachedBdbMap(final String dbName) {
        this();
        this.dbName = dbName;
    }

    /**
     * A constructor for creating a new CachedBdbMap.
     * 
     * Even though the put and get methods conforms to the Collections interface
     * taking any object as key or value, you have to submit the class of the
     * allowed key and value objects here and will get an exception if you try
     * to put anything else in the map.
     * 
     * <p>This constructor internally calls
     * {@link #initialize(Environment, Class, Class, StoredClassCatalog)}.
     * Do not call initialize if you use this constructor.
     * 
     * @param dbDir The directory where the database will be created.
     * @param dbName The name of the database to back this map by.
     * @param keyClass The class of the objects allowed as keys.
     * @param valueClass The class of the objects allowed as values.
     * 
     * @throws DatabaseException is thrown if the underlying BDB JE database
     *             throws an exception.
     */
    public CachedBdbMap(final File dbDir, final String dbName,
            final Class<K> keyClass, final Class<V> valueClass)
    throws DatabaseException {
        this(dbName);
        this.dbEnvironment = getDbEnvironment(dbDir);
        this.dbEnvironment.openDbCount++;
        initialize(dbEnvironment.environment, keyClass, valueClass,
            dbEnvironment.classCatalog);
        if (logger.isLoggable(Level.INFO)) {
            // Write out the bdb configuration.
            EnvironmentConfig cfg = this.dbEnvironment.environment.getConfig();
            logger.info("BdbConfiguration: Cache percentage "  +
                cfg.getCachePercent() + ", cache size " + cfg.getCacheSize() +
                ", Map size: " + size());
        }
    }
    
    /**
     * Call this method when you have an instance when you used the
     * default constructor or when you have a deserialized instance that you
     * want to reconnect with an extant bdbje environment.  Do not
     * call this method if you used the
     * {@link #CachedBdbMap(File, String, Class, Class)} constructor.
     * @param env
     * @param keyClass
     * @param valueClass
     * @param classCatalog
     * @throws DatabaseException
     */
    public synchronized void initialize(final Environment env, final Class keyClass,
            final Class valueClass, final StoredClassCatalog classCatalog)
    throws DatabaseException {
        initializeInstance();
        this.db = openDatabase(env, this.dbName);
        this.diskMap = createDiskMap(this.db, classCatalog, keyClass,
            valueClass);
    }
    
    /**
     * Do any instance setup.
     * This method is used by constructors and when deserializing an instance.
     */
    protected void initializeInstance() {
        this.memMap = new HashMap<K,SoftEntry<V>>();
        this.refQueue = new ReferenceQueue<V>();
    }
    
    protected StoredSortedMap createDiskMap(Database database,
            StoredClassCatalog classCatalog, Class keyClass, Class valueClass) {
        EntryBinding keyBinding = TupleBinding.getPrimitiveBinding(keyClass);
        if(keyBinding == null) {
            keyBinding = new SerialBinding(classCatalog, keyClass);
        }
        EntryBinding valueBinding = TupleBinding.getPrimitiveBinding(valueClass);
        if(valueBinding == null) {
            valueBinding = new SerialBinding(classCatalog, valueClass);
        }
        return new StoredSortedMap(database, keyBinding, valueBinding, true);
    }

    /**
     * Get the database environment for a physical directory where data will be
     * stored.
     * <p>
     * If the environment already exist it will be reused, else a new one will
     * be created.
     * 
     * @param dbDir The directory where BDB JE data will be stored.
     * @return a datastructure containing the environment and a default database
     *         for storing class definitions.
     */
    private DbEnvironmentEntry getDbEnvironment(File dbDir) {
        if (dbEnvironmentMap.containsKey(dbDir.getAbsolutePath())) {
            return (DbEnvironmentEntry) dbEnvironmentMap.get(dbDir
                    .getAbsolutePath());
        }
        EnvironmentConfig envConfig = new EnvironmentConfig();
        envConfig.setAllowCreate(true);
        envConfig.setTransactional(false);
        
        // We're doing the caching ourselves so setting these at the lowest
        // possible level.
        envConfig.setCachePercent(1);
        DbEnvironmentEntry env = new DbEnvironmentEntry();
        try {
            env.environment = new Environment(dbDir, envConfig);
            env.dbDir = dbDir;
            dbEnvironmentMap.put(dbDir.getAbsolutePath(), env);
            
            DatabaseConfig dbConfig = new DatabaseConfig();
            dbConfig.setTransactional(false);
            dbConfig.setAllowCreate(true);
            dbConfig.setDeferredWrite(true);
            
            Database catalogDb = env.environment.openDatabase(null,
                    CLASS_CATALOG, dbConfig);
            
            env.classCatalog = new StoredClassCatalog(catalogDb);
        } catch (DatabaseException e) {
            e.printStackTrace();
            //throw new FatalConfigurationException(e.getMessage());
        }
        return env;
    }

    protected Database openDatabase(final Environment environment,
            final String dbName) throws DatabaseException {
        DatabaseConfig dbConfig = new DatabaseConfig();
        dbConfig.setTransactional(false);
        dbConfig.setAllowCreate(true);
        dbConfig.setDeferredWrite(true);
        return environment.openDatabase(null, dbName, dbConfig);
    }

    public synchronized void close() throws DatabaseException {
        // Close out my bdb db.
        if (this.db != null) {
            try {
                this.db.sync();
                this.db.close();
            } catch (DatabaseException e) {
                e.printStackTrace();
            } finally {
                this.db = null;
            }
        }
        if (dbEnvironment != null) {
            dbEnvironment.openDbCount--;
            if (dbEnvironment.openDbCount <= 0) {
                dbEnvironment.classCatalog.close();
                dbEnvironment.environment.close();
                dbEnvironmentMap.remove(dbEnvironment.dbDir.getAbsolutePath());
                dbEnvironment = null;
            }
        }
    }

    protected void finalize() throws Throwable {
        close();
        super.finalize();
    }

    /**
     * The keySet of the diskMap is all relevant keys. 
     * 
     * @see java.util.Map#keySet()
     */
    @SuppressWarnings("unchecked")
    public Set<K> keySet() {
        return diskMap.keySet();
    }
    
    public Set<Map.Entry<K,V>> entrySet() {
        // Would require complicated implementation to 
        // maintain identity guarantees, so skipping
        throw new UnsupportedOperationException();
    }

    public synchronized V get(final Object object) {
        K key = toKey(object);
        countOfGets++;
        expungeStaleEntries();
        if (countOfGets % 10000 == 0) {
            logCacheSummary();
        }
        SoftEntry<V> entry = memMap.get(key);
        if (entry != null) {
            V val = entry.get(); // get & hold, so not cleared pre-return
            if (val != null) {
                cacheHit++;
                return val;
            }
            // Explicitly clear this entry from referencequeue since its
            // value is null.
            expungeStaleEntry(entry);
        }

        // check backing diskMap
        V v = diskMapGet(key);
        if (v != null) {
            diskHit++;
            memMap.put(key, new SoftEntry<V>(key, v, refQueue));
        }
        return v;
    }

    /**
     * Info to log, if at FINE level, on every get()
     */
    private void logCacheSummary() {
        if (!logger.isLoggable((Level.FINE))) {
            return;
        }
        try {
            long cacheHitPercent = (cacheHit * 100) / (cacheHit + diskHit);
            logger.fine("DB name: " + this.db.getDatabaseName()
                + ", Cache Hit: " + cacheHitPercent
                + "%, Not in map: " + (countOfGets - (cacheHit + diskHit))
                + ", Total number of gets: " + countOfGets);
        } catch (DatabaseException e) {
            // This is just for logging so ignore DB Exceptions
        }
    }
    
    public synchronized V put(K key, V value) {
        V prevVal = get(key);
        memMap.put(key, new SoftEntry<V>(key, value, refQueue));
        diskMap.put(key,value); // dummy
        if(prevVal==null) {
            diskMapSize++;
        }
        return prevVal;
    }

    /**
     * Note that a call to this method CLOSEs the underlying bdbje.
     * This instance is no longer of any use.  It must be re-initialized.
     * We close the db here because if this BigMap is being treated as a plain
     * Map, this is only opportunity for cleanup.
     */
    public synchronized void clear() {
        this.memMap.clear();
        this.diskMap.clear();
        this.diskMapSize = 0;
        try {
            close();
        } catch (DatabaseException e) {
            e.printStackTrace();
        }
    }

    public synchronized V remove(final Object key) {
        V prevValue = get(key);
        memMap.remove(key);
        expungeStaleEntries();
        diskMap.remove(key);
        diskMapSize--;
        return prevValue;
    }

    public synchronized boolean containsKey(Object key) {
        if (quickContainsKey(key)) {
            return true;
        }
        return diskMap.containsKey(key);
    }

    public synchronized boolean quickContainsKey(Object key) {
        expungeStaleEntries();
        return memMap.containsKey(key);
    }

    public synchronized boolean containsValue(Object value) {
        if (quickContainsValue(value)) {
            return true;
        }
        return diskMap.containsValue(value);
    }

    public synchronized boolean quickContainsValue(Object value) {
        expungeStaleEntries();
        // FIXME this isn't really right, as memMap is of SoftEntries
        return memMap.containsValue(value);
    }

    public int size() {
        return diskMapSize;
    }
    
    protected String getDatabaseName() {
        String name = "DbName-Lookup-Failed";
        try {
            if (this.db != null) {
                name = this.db.getDatabaseName();
            }
        } catch (DatabaseException e) {
            // Ignore.
        }
        return name;
    }
    
    /**
     * Sync in-memory map entries to backing disk store.
     * When done, the memory map will be cleared and all entries stored
     * on disk.
     */
    public synchronized void sync() {
        String dbName = null;
        // Sync. memory and disk.
        long startTime = 0;
        if (logger.isLoggable(Level.INFO)) {
            dbName = getDatabaseName();
            startTime = System.currentTimeMillis();
            logger.info(dbName + " start sizes: disk " + this.diskMapSize +
                ", mem " + this.memMap.size());
        }
        expungeStaleEntries();
        LinkedList<SoftEntry> stale = new LinkedList<SoftEntry>(); 
        for (Iterator i = this.memMap.keySet().iterator(); i.hasNext();) {
            Object key = i.next();
            SoftEntry entry = (SoftEntry) memMap.get(key);
            if (entry != null) {
                // Get & hold so not cleared pre-return.
                Object value = entry.get();
                if (value != null) {
                    this.diskMap.put(key, value);
                } else {
                    stale.add(entry);
                }
            }
        }
        // for any entries above that had been cleared, ensure expunged
        for (SoftEntry entry : stale) {
            expungeStaleEntry(entry);
        }   
        
        // force sync of deferred-writes
        try {
            this.db.sync();
        } catch (DatabaseException e) {
            // TODO Auto-generated catch block
            throw new RuntimeException(e);
        }
        
        if (logger.isLoggable(Level.INFO)) {
            logger.info(dbName + " sync took " +
                (System.currentTimeMillis() - startTime) + "ms. " +
                "Finish sizes: disk " +
                this.diskMapSize + ", mem " + this.memMap.size());
        }
    }

    private void expungeStaleEntries() {
        int c = 0;
        for(SoftEntry entry; (entry = refQueuePoll()) != null;) {
            expungeStaleEntry(entry);
            c++;
        }
        if (c > 0 && logger.isLoggable(Level.FINER)) {
            try {
                logger.finer("DB: " + db.getDatabaseName() + ",  Expunged: "
                        + c + ", Diskmap size: " + diskMapSize
                        + ", Cache size: " + memMap.size());
            } catch (DatabaseException e) {
                // Just for logging so ignore Exceptions
            }
        }
    }
    
    private void expungeStaleEntry(SoftEntry entry) {
        // If phantom already null, its already expunged -- probably
        // because it was purged directly first from inside in
        // {@link #get(String)} and then it went on the poll queue and
        // when it came off inside in expungeStaleEntries, this method
        // was called again.
        if (entry.getPhantom() == null) {
            return;
        }
        // If the object that is in memMap is not the one passed here, then
        // memMap has been changed -- probably by a put on top of this entry.
        if (memMap.get(entry.getPhantom().getKey()) == entry) {
            memMap.remove(entry.getPhantom().getKey());
            diskMap.put(entry.getPhantom().getKey(),
                entry.getPhantom().doctoredGet());
        }
        entry.clearPhantom();
    }
    
    private class PhantomEntry<T> extends PhantomReference<T> {
        private final Object key;

        public PhantomEntry(Object key, T referent) {
            super(referent, null);
            this.key = key;
        }

        /**
         * @return Return the referent. The contract for {@link #get()}
         * always returns a null referent.  We've cheated and doctored
         * PhantomReference to return the actual referent value.  See notes
         * at {@link #referentField};
         */
        public Object doctoredGet() {
            try {
                // Here we use the referentField saved off on static
                // initialization of this class to get at this References'
                // private referent field.
                return referentField.get(this);
            } catch (IllegalAccessException e) {
                throw new RuntimeException(e);
            }
        }

        /**
         * @return Returns the key.
         */
        public Object getKey() {
            return this.key;
        }
    }

    private class SoftEntry<T> extends SoftReference<T> {
        private PhantomEntry<T> phantom;

        public SoftEntry(Object key, T referent, ReferenceQueue<T> q) {
            super(referent, q);
            this.phantom = new PhantomEntry<T>(key, referent);
        }

        /**
         * @return Returns the phantom reference.
         */
        public PhantomEntry getPhantom() {
            return this.phantom;
        }
        
        public void clearPhantom() {
            this.phantom.clear();
            this.phantom = null;
            super.clear();
        }
    }
    
    private void readObject(java.io.ObjectInputStream stream)
    throws IOException, ClassNotFoundException {
        stream.defaultReadObject();
        initializeInstance();
        if (logger.isLoggable(Level.FINE)) {
            logger.fine(getDatabaseName() + " diskMapSize: " + diskMapSize);
        }
    }
    
 
    
    @SuppressWarnings("unchecked")
    private K toKey(Object o) {
        return (K)o;
    }
    
    @SuppressWarnings("unchecked")
    private V diskMapGet(K k) {
        return (V)diskMap.get(k);
    }
    
    @SuppressWarnings("unchecked")
    private SoftEntry<V> refQueuePoll() {
        return (SoftEntry)refQueue.poll();
    }
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.