org.carrot2.core.CachingProcessingComponentManager.java Source code

Java tutorial

Introduction

Here is the source code for org.carrot2.core.CachingProcessingComponentManager.java

Source

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2015, Dawid Weiss, Stanisaw Osiski.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.core;

import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;

import org.carrot2.core.Controller.IControllerStatisticsProvider;
import org.carrot2.core.attribute.Processing;
import org.carrot2.util.ExceptionUtils;
import org.carrot2.util.Pair;
import org.carrot2.util.attribute.*;

import com.google.common.cache.*;
import com.google.common.collect.*;
import com.google.common.util.concurrent.UncheckedExecutionException;

/**
 * An {@link IProcessingComponentManager} that implements processing results caching
 * functionality.
 * <p>
 * This manager wraps some delegate manager (e.g. a
 * {@link SimpleProcessingComponentManager} or a {@link PoolingProcessingComponentManager}
 * ) and wraps the components the delegate with a functionality that either returns the
 * results from cache or performs the processing if the result are not yet cached.
 * </p>
 */
public class CachingProcessingComponentManager
        implements IProcessingComponentManager, Controller.IControllerStatisticsProvider {
    /** The delegate manager that prepares the actual processing components */
    final IProcessingComponentManager delegate;

    /**
     * Descriptors of {@link Input} and {@link Output} {@link Processing} attributes of
     * components whose output is to be cached.
     */
    private final Map<Pair<Class<? extends IProcessingComponent>, String>, InputOutputAttributeDescriptors> cachedComponentAttributeDescriptors = Maps
            .newHashMap();

    /**
     * A set of {@link IProcessingComponent}s whose data should be cached internally.
     */
    final Set<Class<? extends IProcessingComponent>> cachedComponentClasses;

    /**
     * Populates on-demand and caches the data from components of classes provided in
     * {@link #cachedComponentClasses}. The key of the cache is a map of all {@link Input}
     * {@link Processing} attributes of the component for which caching is performed. The
     * value of the cache is a map of all {@link Output} {@link Processing} attributes
     * produced by the component.
     */
    private Cache<AttributeMapCacheKey, Map<String, Object>> cache;

    /** Cache statistics keys. */
    static final String CACHE_MISSES = "cache.misses";
    static final String CACHE_HITS_TOTAL = "cache.hits.total";

    /**
     * Creates a {@link CachingProcessingComponentManager}.
     * 
     * @param delegate the manager to handle the preparation of the actual processing
     *            component instances
     * @param cachedComponentClasses classes of components whose output should be cached
     *            by the controller. If a superclass is provided here, e.g.
     *            {@link IDocumentSource}, all its subclasses will be subject to caching.
     *            If {@link IProcessingComponent} is provided here, output of all
     *            components will be cached.
     */
    public CachingProcessingComponentManager(IProcessingComponentManager delegate,
            Class<? extends IProcessingComponent>... cachedComponentClasses) {
        this.delegate = delegate;
        this.cachedComponentClasses = ImmutableSet.copyOf(cachedComponentClasses);

        // Initialize cache.
        cache = CacheBuilder.newBuilder().maximumSize(100).recordStats().build();
    }

    public void init(IControllerContext context, Map<String, Object> attributes,
            ProcessingComponentConfiguration... configurations) {
        delegate.init(context, attributes, configurations);
    }

    public IProcessingComponent prepare(Class<? extends IProcessingComponent> clazz, String id,
            Map<String, Object> inputAttributes, Map<String, Object> outputAttributes) {
        // If the processing component is to be cached, wrap with our internal
        // processing component implementation that will do the caching.

        // One very important implementation detail is that the only moment we can pass
        // all input attributes (including the processing-time ones) to the component
        // wrapper is here when we create it. For this reason, the controller needs to
        // pass all attributes at this state, even though the other manager will likely
        // use only the init-time attributes. The same goes for output attributes,
        // these will be collected to the map we provide during the creation of the
        // wrapper.
        for (Class<?> cachedClass : cachedComponentClasses) {
            if (cachedClass.isAssignableFrom(clazz)) {
                return new CachedProcessingComponent(clazz, id, inputAttributes, outputAttributes);
            }
        }

        // Otherwise, return the original component
        return delegate.prepare(clazz, id, inputAttributes, outputAttributes);
    }

    @Override
    public void recycle(IProcessingComponent component, String id) {
        // If not our wrapper, recycle.
        if (!(component instanceof CachedProcessingComponent)) {
            delegate.recycle(component, id);
        }

        // The wrapped actual components are recycled in ValueProducer when
        // they're asked to perform processing.
    }

    public void dispose() {
        try {
            delegate.dispose();
            if (cache != null) {
                cache.invalidateAll();
            }
        } finally {
            cache = null;
        }
    }

    public Map<String, Object> getStatistics() {
        // Return some custom statistics
        final CacheStats cacheStats = cache.stats();
        final Map<String, Object> stats = Maps.newHashMap();
        if (delegate instanceof IControllerStatisticsProvider) {
            stats.putAll(((IControllerStatisticsProvider) delegate).getStatistics());
        }
        stats.put(CACHE_MISSES, cacheStats.missCount());
        stats.put(CACHE_HITS_TOTAL, cacheStats.hitCount());

        return stats;
    }

    // Two extra attributes to add to the input map. This way, they will also become
    // part of the cache key, which is what we need.
    private static final String COMPONENT_CLASS_KEY = CachingProcessingComponentManager.class.getName()
            + ".componentClass";
    private static final String COMPONENT_ID_KEY = CachingProcessingComponentManager.class.getName()
            + ".componentId";

    /** Any values put under this attribute will cause a cache bypass (dropping of the stale value). */
    public static final String CACHE_BYPASS_ATTR = CachingProcessingComponentManager.class.getName()
            + ".cacheBypass";

    /**
     * A stub component that fetches the data from the cache and adds the results to the
     * attribute map.
     */
    private final class CachedProcessingComponent extends ProcessingComponentBase {
        private final Class<? extends IProcessingComponent> componentClass;
        private final String componentId;

        /** All input attributes, including processing-time ones. */
        private final Map<String, Object> inputAttributes;

        /** A map to store the output attributes in. */
        private final Map<String, Object> outputAttributes;

        CachedProcessingComponent(Class<? extends IProcessingComponent> componentClass, String componentId,
                Map<String, Object> inputAttributes, Map<String, Object> outputAttributes) {
            this.componentClass = componentClass;
            this.inputAttributes = inputAttributes;
            this.outputAttributes = outputAttributes;
            this.componentId = componentId;
        }

        @Override
        public void process() throws ProcessingException {
            final InputOutputAttributeDescriptors descriptors = prepareAttributeDescriptors();

            // Copy the output attributes produced by the preceding components. Normally,
            // this could be done by ControllerUtils, but the wrapper was created before
            // any processing took place anyway, so the inputAttributes did not have any
            // results yet.
            inputAttributes.putAll(outputAttributes);

            // We'll need @Input @Processing attributes for the cache key
            final Map<String, Object> inputProcessingAttributes = getAttributesForDescriptors(
                    descriptors.inputProcessingDescriptors, inputAttributes);

            // Plus component class and id
            inputProcessingAttributes.put(COMPONENT_CLASS_KEY, componentClass);
            inputProcessingAttributes.put(COMPONENT_ID_KEY, componentId);

            // Get data from cache. If the result is not in the cache yet, it will
            // be created by the ValueProducer.
            final AttributeMapCacheKey key = new AttributeMapCacheKey(inputProcessingAttributes, inputAttributes);

            // Cache bypass.
            if (inputAttributes.containsKey(CACHE_BYPASS_ATTR)
                    && Boolean.valueOf(inputAttributes.get(CACHE_BYPASS_ATTR).toString())) {
                cache.invalidate(key);
            }

            try {
                final Map<String, Object> processingResult = cache.get(key, new ValueProducer(key));

                // Copy the results @Output @Processing attributes back to the result
                outputAttributes
                        .putAll(getAttributesForDescriptors(descriptors.outputDescriptors, processingResult));
            } catch (UncheckedExecutionException e) {
                throw ExceptionUtils.wrapAs(ProcessingException.class, e.getCause());
            } catch (ExecutionException e) {
                throw ExceptionUtils.wrapAs(ProcessingException.class, e.getCause());
            }
        }

        /**
         * Returns attribute descriptors for {@link Input} {@link Processing} and
         * {@link Output} {@link Processing} attributes of the component whose results
         * will be cached.
         */
        @SuppressWarnings("unchecked")
        private InputOutputAttributeDescriptors prepareAttributeDescriptors() {
            InputOutputAttributeDescriptors descriptors = null;

            synchronized (cachedComponentAttributeDescriptors) {
                descriptors = cachedComponentAttributeDescriptors
                        .get(new Pair<Class<? extends IProcessingComponent>, String>(componentClass, componentId));
                if (descriptors == null) {
                    // Need to borrow a component for a while to build descriptors
                    IProcessingComponent component = null;
                    try {
                        component = delegate.prepare(componentClass, componentId, inputAttributes,
                                Maps.<String, Object>newHashMap());

                        // Build and store descriptors
                        descriptors = new InputOutputAttributeDescriptors(
                                BindableDescriptorBuilder.buildDescriptor(component)
                                        .only(Input.class, Processing.class).flatten().attributeDescriptors,
                                BindableDescriptorBuilder.buildDescriptor(component).only(Output.class)
                                        .flatten().attributeDescriptors);

                        cachedComponentAttributeDescriptors
                                .put(new Pair<Class<? extends IProcessingComponent>, String>(componentClass,
                                        componentId), descriptors);
                    } finally {
                        if (component != null) {
                            delegate.recycle(component, componentId);
                        }
                    }
                }
            }

            return descriptors;
        }

        /**
         * Returns a map with only with values corresponding to the provided descriptors.
         */
        Map<String, Object> getAttributesForDescriptors(final Map<String, AttributeDescriptor> inputDescriptors,
                Map<String, Object> attributes) {
            final Map<String, Object> attributesForDrescriptors = Maps.newHashMap();
            for (AttributeDescriptor descriptor : inputDescriptors.values()) {
                if (attributes.containsKey(descriptor.key)) {
                    attributesForDrescriptors.put(descriptor.key, attributes.get(descriptor.key));
                }
            }
            return attributesForDrescriptors;
        }
    }

    /**
     * A compound cache key based on the input attributes map that ensures that possible
     * modifications to the attributes map or its values do not change the hashCode and
     * equality behavior of the key.
     */
    private static final class AttributeMapCacheKey {
        /** Input processing attributes, the key for the cache */
        private Map<String, Object> inputProcessingAttributes;

        /** Hash code for input processing attributes */
        private int hashCode;

        /**
         * All input attributes. This map is not part of the cache key, but we will need
         * it to properly retrieve entries from the cache.
         */
        private Map<String, Object> inputAttributes;

        private AttributeMapCacheKey(Map<String, Object> inputProcessingAttributes,
                Map<String, Object> inputAttributes) {
            /*
             * Empty attributes should never happen because the attributes object must
             * hold component identifiers, etc.
             */
            assert inputProcessingAttributes != null && inputProcessingAttributes.size() > 0;

            /*
             * In theory, we could make a shallow copy of the provided map, but if someone
             * wants to make modifications they'll make them anyway on the objects
             * contained in the map. To be completely safe, we'd have to make a deep copy.
             * To prevent simple errors, we make the map unmodifiable.
             */
            this.inputProcessingAttributes = Collections.unmodifiableMap(inputProcessingAttributes);
            this.hashCode = inputProcessingAttributes.hashCode();

            this.inputAttributes = inputAttributes;
        }

        /*
         * We assume that equal hash codes means equal objects, which is not true in case
         * of conflicts, but there is no other way really if we don't want to make deep
         * copies of the attribute map. If a conflict occurs, we would retrieve a stale
         * result from the cache (a result associated with a different query, possibly a
         * different component even). The cache is in-memory only and is rather small (so
         * that re-querying for documents and clusters does not cause duplicated
         * processing), conflicts do not seem like a big problem.
         */
        @Override
        public boolean equals(Object obj) {
            if (!(obj instanceof AttributeMapCacheKey)) {
                return false;
            }

            final boolean result = (obj.hashCode() == this.hashCode);
            if (result) {
                assert ((AttributeMapCacheKey) obj).inputProcessingAttributes
                        .equals(this.inputProcessingAttributes);
            }
            return result;
        }

        @Override
        public int hashCode() {
            return hashCode;
        }
    }

    /**
     * A cached data factory that actually performs the processing. This factory is called
     * only if the cache does not contain the requested value.
     */
    private final class ValueProducer implements Callable<Map<String, Object>> {
        private final AttributeMapCacheKey key;

        public ValueProducer(AttributeMapCacheKey key) {
            this.key = key;
        }

        @Override
        public Map<String, Object> call() throws Exception {
            final Map<String, Object> inputProcessingAttributes = key.inputProcessingAttributes;

            @SuppressWarnings("unchecked")
            final Class<? extends IProcessingComponent> componentClass = (Class<? extends IProcessingComponent>) inputProcessingAttributes
                    .get(COMPONENT_CLASS_KEY);

            final String componentId = (String) inputProcessingAttributes.get(COMPONENT_ID_KEY);

            IProcessingComponent component = null;
            try {
                final Map<String, Object> attributes = Maps.newHashMap();
                component = delegate.prepare(componentClass, componentId, key.inputAttributes, attributes);

                ControllerUtils.performProcessing(component, inputProcessingAttributes, attributes);

                return attributes;
            } finally {
                if (component != null) {
                    delegate.recycle(component, componentId);
                }
            }
        }
    }

    /**
     * Stores a pair of maps of {@link Input} and {@link Output} descriptors.
     */
    private final static class InputOutputAttributeDescriptors {
        final Map<String, AttributeDescriptor> inputProcessingDescriptors;
        final Map<String, AttributeDescriptor> outputDescriptors;

        InputOutputAttributeDescriptors(Map<String, AttributeDescriptor> inputDescriptors,
                Map<String, AttributeDescriptor> outputDescriptors) {
            this.inputProcessingDescriptors = inputDescriptors;
            this.outputDescriptors = outputDescriptors;
        }
    }
}