org.alfresco.repo.content.transform.AbstractContentTransformer2.java Source code

Java tutorial

Introduction

Here is the source code for org.alfresco.repo.content.transform.AbstractContentTransformer2.java

Source

/*
 * #%L
 * Alfresco Repository
 * %%
 * Copyright (C) 2005 - 2016 Alfresco Software Limited
 * %%
 * This file is part of the Alfresco software. 
 * If the software was purchased under a paid Alfresco license, the terms of 
 * the paid license agreement will prevail.  Otherwise, the software is 
 * provided under the following open source license terms:
 * 
 * Alfresco is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Alfresco is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
 * #L%
 */
package org.alfresco.repo.content.transform;

import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import org.alfresco.api.AlfrescoPublicApi;
import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.repo.content.AbstractStreamAwareProxy;
import org.alfresco.repo.content.StreamAwareContentReaderProxy;
import org.alfresco.repo.content.StreamAwareContentWriterProxy;
import org.alfresco.repo.content.metadata.AbstractMappingMetadataExtracter;
import org.alfresco.repo.content.metadata.MetadataExtracterConfig;
import org.alfresco.service.cmr.repository.ContentIOException;
import org.alfresco.service.cmr.repository.ContentReader;
import org.alfresco.service.cmr.repository.ContentServiceTransientException;
import org.alfresco.service.cmr.repository.ContentWriter;
import org.alfresco.service.cmr.repository.TransformationOptionLimits;
import org.alfresco.service.cmr.repository.TransformationOptions;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * Provides basic services for {@link org.alfresco.repo.content.transform.ContentTransformer}
 * implementations.
 * <p>
 * This class maintains the performance measures for the transformers as well, making sure that
 * there is an extra penalty for transformers that fail regularly.
 * 
 * @author Derek Hulley
 * @author Roy Wetherall
 */
@AlfrescoPublicApi
public abstract class AbstractContentTransformer2 extends AbstractContentTransformerLimits {
    private static final Log logger = LogFactory.getLog(AbstractContentTransformer2.class);

    private ExecutorService executorService;

    private ContentTransformerRegistry registry;
    private boolean registerTransformer;
    private boolean retryTransformOnDifferentMimeType;
    private boolean strictMimeTypeCheck;
    MetadataExtracterConfig metadataExtracterConfig;
    /**
     * A flag that indicates that the transformer should be started in it own Thread so
     * that it may be interrupted rather than using the timeout in the Reader.
     * Need only be set for transformers that read their source data quickly but then
     * take a long time to process the data (such as {@link PoiOOXMLContentTransformer}.
     */
    private Boolean useTimeoutThread = false;

    /**
     * Extra time added the timeout when using a Thread for the transformation so that
     * a timeout from the Reader has a chance to happen first.
     */
    private long additionalThreadTimout = 2000;

    private static ThreadLocal<Integer> depth = new ThreadLocal<Integer>() {
        @Override
        protected Integer initialValue() {
            return 0;
        }
    };

    /**
     * All transformers start with an average transformation time of 0.0 ms,
     * unless there is an Alfresco global property {@code <beanName>.time}.
     * May also be set for given combinations of source and target mimetypes.
     */
    protected AbstractContentTransformer2() {
    }

    /**
     * The registry to auto-register with
     * 
     * @param registry the transformer registry
     */
    public void setRegistry(ContentTransformerRegistry registry) {
        this.registry = registry;
    }

    /**
     * The metadata extracter config.
     */
    public void setMetadataExtracterConfig(MetadataExtracterConfig metadataExtracterConfig) {
        this.metadataExtracterConfig = metadataExtracterConfig;
    }

    /**
     * @param registerTransformer as been available for selection.
     *        If {@code false} this indicates that the transformer may only be
     *        used as part of another transformer.
     */
    public void setRegisterTransformer(boolean registerTransformer) {
        this.registerTransformer = registerTransformer;
    }

    @Override
    public String toString() {
        return this.getClass().getSimpleName();
    }

    /**
     * Registers this instance with the {@link #setRegistry(ContentTransformerRegistry) registry}
     * if it is present.
     * 
     * THIS IS A CUSTOM SPRING INIT METHOD
     */
    public void register() {
        super.register();
        if (registry == null) {
            logger.warn("Property 'registry' has not been set.  Ignoring auto-registration: \n" + "   transformer: "
                    + this.getName());
        } else if (registerTransformer) {
            registry.addTransformer(this);
        } else {
            registry.addComponentTransformer(this);
            logger.debug("Property 'registerTransformer' have not been set, so transformer (" + this.getName()
                    + ") may only be used as a component of a complex transformer.");
        }
    }

    /**
     * Convenience method to check the transformability of a transformation
     * 
     * @param reader    content reader
     * @param writer    content writer
     * @param options   transformation options
     * @throws AlfrescoRuntimeException if the the transformation isn't supported
     */
    protected void checkTransformable(ContentReader reader, ContentWriter writer, TransformationOptions options) {
        String sourceMimetype = getMimetype(reader);
        String targetMimetype = getMimetype(writer);
        long sourceSize = reader.getSize();
        boolean transformable = isTransformable(sourceMimetype, sourceSize, targetMimetype, options);
        if (transformable == false) {
            // This method is only called once a transformer has been selected, so it should be able to
            // handle the mimetypes but might not be able to handle all the limits as it might be part of
            // of a complex (compound) transformer. So report the max size if set.
            long maxSourceSizeKBytes = getMaxSourceSizeKBytes(sourceMimetype, targetMimetype, options);
            boolean sizeOkay = maxSourceSizeKBytes < 0
                    || (maxSourceSizeKBytes > 0 && sourceSize <= maxSourceSizeKBytes * 1024);
            AlfrescoRuntimeException e = new UnsupportedTransformationException("Unsupported transformation: "
                    + getBeanName() + ' ' + sourceMimetype + " to " + targetMimetype + ' '
                    + (sizeOkay ? ""
                            : transformerDebug.fileSize(sourceSize) + " > "
                                    + transformerDebug.fileSize(maxSourceSizeKBytes * 1024)));
            throw transformerDebug.setCause(e);
        }
        // it all checks out OK
    }

    /**
     * Method to be implemented by subclasses wishing to make use of the common infrastructural code
     * provided by this class.
     * 
     * @param reader the source of the content to transform
     * @param writer the target to which to write the transformed content
     * @param options a map of options to use when performing the transformation.  The map
     *      will never be null.
     * @throws Exception exceptions will be handled by this class - subclasses can throw anything
     */
    protected abstract void transformInternal(ContentReader reader, ContentWriter writer,
            TransformationOptions options) throws Exception;

    /**
     * @see #transform(ContentReader, ContentWriter, Map)
     * @see #transformInternal(ContentReader, ContentWriter, TransformationOptions)
     */
    public final void transform(ContentReader reader, ContentWriter writer) throws ContentIOException {
        transform(reader, writer, new TransformationOptions());
    }

    /**
     * @see org.alfresco.repo.content.transform.ContentTransformer#transform(org.alfresco.service.cmr.repository.ContentReader, org.alfresco.service.cmr.repository.ContentWriter, org.alfresco.service.cmr.repository.TransformationOptions)
     */
    public final void transform(ContentReader reader, ContentWriter writer, TransformationOptions options)
            throws ContentIOException {
        try {
            depth.set(depth.get() + 1);

            // begin timing
            long before = System.currentTimeMillis();

            String sourceMimetype = reader.getMimetype();
            String targetMimetype = writer.getMimetype();

            // check options map
            if (options == null) {
                options = new TransformationOptions();
            }

            try {
                if (transformerDebug.isEnabled()) {
                    transformerDebug.pushTransform(this, reader.getContentUrl(), sourceMimetype, targetMimetype,
                            reader.getSize(), options);
                }

                // MNT-16381: check the mimetype of the file supplied by the user
                // matches the sourceMimetype of the reader. Intermediate files are
                // not checked.
                strictMimetypeCheck(reader, options, sourceMimetype);

                // Check the transformability
                checkTransformable(reader, writer, options);

                // Pass on any limits to the reader
                setReaderLimits(reader, writer, options);

                // Transform
                // MNT-12238: CLONE - CLONE - Upload of PPTX causes very high memory usage leading to system instability
                // Limiting transformation up to configured amount of milliseconds to avoid very high RAM consumption
                // and OOM during transforming problematic documents
                TransformationOptionLimits limits = getLimits(reader.getMimetype(), writer.getMimetype(), options);

                long timeoutMs = limits.getTimeoutMs();
                if (!useTimeoutThread || (null == limits) || (-1 == timeoutMs)) {
                    transformInternal(reader, writer, options);
                } else {
                    Future<?> submittedTask = null;
                    StreamAwareContentReaderProxy proxiedReader = new StreamAwareContentReaderProxy(reader);
                    StreamAwareContentWriterProxy proxiedWriter = new StreamAwareContentWriterProxy(writer);

                    try {
                        submittedTask = getExecutorService()
                                .submit(new TransformInternalCallable(proxiedReader, proxiedWriter, options));
                        submittedTask.get(timeoutMs + additionalThreadTimout, TimeUnit.MILLISECONDS);
                    } catch (TimeoutException e) {
                        releaseResources(submittedTask, proxiedReader, proxiedWriter);
                        throw new TimeoutException("Transformation failed due to timeout limit");
                    } catch (InterruptedException e) {
                        releaseResources(submittedTask, proxiedReader, proxiedWriter);
                        throw new InterruptedException(
                                "Transformation failed, because the thread of the transformation was interrupted");
                    } catch (ExecutionException e) {
                        Throwable cause = e.getCause();
                        if (cause instanceof TransformInternalCallableException) {
                            cause = ((TransformInternalCallableException) cause).getCause();
                        }

                        throw cause;
                    }
                }

                // record time
                long after = System.currentTimeMillis();
                recordTime(sourceMimetype, targetMimetype, after - before);
            } catch (ContentServiceTransientException cste) {
                // A transient failure has occurred within the content transformer.
                // This should not be interpreted as a failure and therefore we should not
                // update the transformer's average time.
                if (logger.isDebugEnabled()) {
                    logger.debug("Transformation has been transiently declined: \n" + "   reader: " + reader + "\n"
                            + "   writer: " + writer + "\n" + "   options: " + options + "\n" + "   transformer: "
                            + this);
                }
                // the finally block below will still perform tidyup. Otherwise we're done.
                // We rethrow the exception
                throw cste;
            } catch (UnsupportedTransformationException e) {
                // Don't record an error or even the time, as this is normal in compound transformations.
                transformerDebug.debug("          Failed", e);
                throw e;
            } catch (Throwable e) {
                // Make sure that this transformation gets set back i.t.o. time taken.
                // This will ensure that transformers that compete for the same transformation
                // will be prejudiced against transformers that tend to fail
                long after = System.currentTimeMillis();
                recordError(sourceMimetype, targetMimetype, after - before);

                // Ask Tika to detect the document, and report back on if
                //  the current mime type is plausible
                String differentType = getMimetypeService().getMimetypeIfNotMatches(reader.getReader());

                // Report the error
                if (differentType == null) {
                    transformerDebug.debug("          Failed", e);
                    throw new ContentIOException("Content conversion failed: \n" + "   reader: " + reader + "\n"
                            + "   writer: " + writer + "\n" + "   options: " + options.toString(false) + "\n"
                            + "   limits: " + getLimits(reader, writer, options), e);
                } else {
                    transformerDebug.debug("          Failed: Mime type was '" + differentType + "'", e);

                    if (retryTransformOnDifferentMimeType) {
                        // MNT-11015 fix.
                        // Set a new reader to refresh the input stream.
                        reader = reader.getReader();
                        // set the actual file MIME type detected by Tika for content reader
                        reader.setMimetype(differentType);

                        // Get correct transformer according actual file MIME type and try to transform file with
                        // actual transformer
                        ContentTransformer transformer = this.registry.getTransformer(differentType,
                                reader.getSize(), targetMimetype, options);
                        if (null != transformer) {
                            transformer.transform(reader, writer, options);
                        } else {
                            transformerDebug.debug("          Failed", e);
                            throw new ContentIOException("Content conversion failed: \n" + "   reader: " + reader
                                    + "\n" + "   writer: " + writer + "\n" + "   options: "
                                    + options.toString(false) + "\n" + "   limits: "
                                    + getLimits(reader, writer, options) + "\n" + "   claimed mime type: "
                                    + reader.getMimetype() + "\n" + "   detected mime type: " + differentType + "\n"
                                    + "   transformer not found" + "\n", e);
                        }
                    } else {
                        throw new ContentIOException("Content conversion failed: \n" + "   reader: " + reader + "\n"
                                + "   writer: " + writer + "\n" + "   options: " + options.toString(false) + "\n"
                                + "   limits: " + getLimits(reader, writer, options) + "\n"
                                + "   claimed mime type: " + reader.getMimetype() + "\n" + "   detected mime type: "
                                + differentType, e);
                    }
                }
            } finally {
                transformerDebug.popTransform();

                // check that the reader and writer are both closed
                if (reader.isChannelOpen()) {
                    logger.error("Content reader not closed by transformer: \n" + "   reader: " + reader + "\n"
                            + "   transformer: " + this);
                }
                if (writer.isChannelOpen()) {
                    logger.error("Content writer not closed by transformer: \n" + "   writer: " + writer + "\n"
                            + "   transformer: " + this);
                }
            }

            // done
            if (logger.isDebugEnabled()) {
                logger.debug("Completed transformation: \n" + "   reader: " + reader + "\n" + "   writer: " + writer
                        + "\n" + "   options: " + options + "\n" + "   transformer: " + this);
            }
        } finally {
            depth.set(depth.get() - 1);
        }
    }

    private void strictMimetypeCheck(ContentReader reader, TransformationOptions options, String sourceMimetype)
            throws UnsupportedTransformationException {
        if (strictMimeTypeCheck && depth.get() == 1) {
            String differentType = getMimetypeService().getMimetypeIfNotMatches(reader.getReader());

            if (!transformerConfig.strictMimetypeCheck(sourceMimetype, differentType)) {
                String fileName = transformerDebug.getFileName(options, true, 0);
                String readerSourceMimetype = reader.getMimetype();
                String message = "Transformation of (" + fileName
                        + ") has not taken place because the declared mimetype (" + readerSourceMimetype
                        + ") does not match the detected mimetype (" + differentType + ").";
                logger.warn(message);
                throw new UnsupportedTransformationException(message);
            }
        }
    }

    /**
     * Cancels <code>task</code> and closes content accessors
     * 
     * @param task - {@link Future} task instance which specifies a transformation action
     * @param proxiedReader - {@link AbstractStreamAwareProxy} instance which represents channel closing mechanism for content reader
     * @param proxiedWriter - {@link AbstractStreamAwareProxy} instance which represents channel closing mechanism for content writer
     */
    private void releaseResources(Future<?> task, AbstractStreamAwareProxy proxiedReader,
            AbstractStreamAwareProxy proxiedWriter) {
        if (null != task) {
            task.cancel(true);
        }

        if (null != proxiedReader) {
            proxiedReader.release();
        }

        if (null != proxiedWriter) {
            proxiedWriter.release();
        }
    }

    public final void transform(ContentReader reader, ContentWriter writer, Map<String, Object> options)
            throws ContentIOException {
        this.transform(reader, writer, new TransformationOptions(options));
    }

    /**
     * @return Returns the calculated running average of the current transformations
     */
    public synchronized long getTransformationTime() {
        return transformerConfig.getStatistics(this, null, null, true).getAverageTime();
    }

    /**
     * @return Returns the calculated running average of the current transformations
     */
    public synchronized long getTransformationTime(String sourceMimetype, String targetMimetype) {
        return transformerConfig.getStatistics(this, sourceMimetype, targetMimetype, true).getAverageTime();
    }

    /**
     * @deprecated use method with mimetypes.
     */
    protected final synchronized void recordTime(long transformationTime) {
        recordTime(TransformerConfig.ANY, TransformerConfig.ANY, transformationTime);
    }

    /**
     * Records and updates the average transformation time for this transformer.
     * <p>
     * Subclasses should call this after every transformation in order to keep
     * the running average of the transformation times up to date.
     * <p>
     * This method is thread-safe.  The time spent in this method is negligible
     * so the impact will be minor.
     * 
     * @param sourceMimetype String
     * @param targetMimetype String
     * @param transformationTime the time it took to perform the transformation.
     */
    protected final synchronized void recordTime(String sourceMimetype, String targetMimetype,
            long transformationTime) {
        transformerConfig.getStatistics(this, sourceMimetype, targetMimetype, true).recordTime(transformationTime);
        if (depth.get() == 1) {
            transformerConfig.getStatistics(null, sourceMimetype, targetMimetype, true)
                    .recordTime(transformationTime);
        }
    }

    /**
     * Gets the <code>ExecutorService</code> to be used for timeout-aware extraction.
     * <p>
     * If no <code>ExecutorService</code> has been defined a default of <code>Executors.newCachedThreadPool()</code> is used during {@link AbstractMappingMetadataExtracter#init()}.
     * 
     * @return the defined or default <code>ExecutorService</code>
     */
    protected ExecutorService getExecutorService() {
        if (null == executorService) {
            executorService = Executors.newCachedThreadPool();
        }

        return executorService;
    }

    /**
     * Sets the <code>ExecutorService</code> to be used for timeout-aware transformation.
     * 
     * @param executorService - {@link ExecutorService} instance for timeouts
     */
    public void setExecutorService(ExecutorService executorService) {
        this.executorService = executorService;
    }

    /**
     * {@link Callable} wrapper for the {@link AbstractContentTransformer2#transformInternal(ContentReader, ContentWriter, TransformationOptions)} method to handle timeouts.
     */
    private class TransformInternalCallable implements Callable<Void> {
        private ContentReader reader;

        private ContentWriter writer;

        private TransformationOptions options;

        public TransformInternalCallable(ContentReader reader, ContentWriter writer,
                TransformationOptions options) {
            this.reader = reader;
            this.writer = writer;
            this.options = options;
        }

        @Override
        public Void call() throws Exception {
            try {
                transformInternal(reader, writer, options);
                return null;
            } catch (Throwable e) {
                throw new TransformInternalCallableException(e);
            }
        }
    }

    /**
     * Exception wrapper to handle any {@link Throwable} from {@link AbstractContentTransformer2#transformInternal(ContentReader, ContentWriter, TransformationOptions)}
     */
    private class TransformInternalCallableException extends Exception {
        private static final long serialVersionUID = 7740560508772740658L;

        public TransformInternalCallableException(Throwable cause) {
            super(cause);
        }
    }

    /**
     * @param useTimeoutThread - {@link Boolean} value which specifies timeout limiting mechanism for the current transformer
     * @see AbstractContentTransformer2#useTimeoutThread
     */
    public void setUseTimeoutThread(Boolean useTimeoutThread) {
        if (null == useTimeoutThread) {
            useTimeoutThread = true;
        }

        this.useTimeoutThread = useTimeoutThread;
    }

    public void setAdditionalThreadTimout(long additionalThreadTimout) {
        this.additionalThreadTimout = additionalThreadTimout;
    }

    public Boolean isTransformationLimitedInternally() {
        return useTimeoutThread;
    }

    /**
     * Records an error and updates the average time as if the transformation took a
     * long time, so that it is less likely to be called again.
     * @param sourceMimetype String
     * @param targetMimetype String
     * @param transformationTime the time it took to perform the transformation.
     */
    protected final synchronized void recordError(String sourceMimetype, String targetMimetype,
            long transformationTime) {
        transformerConfig.getStatistics(this, sourceMimetype, targetMimetype, true).recordError(transformationTime);
        if (depth.get() == 1) {
            transformerConfig.getStatistics(null, sourceMimetype, targetMimetype, true)
                    .recordError(transformationTime);
        }
    }

    public Object getRetryTransformOnDifferentMimeType() {
        return retryTransformOnDifferentMimeType;
    }

    public void setRetryTransformOnDifferentMimeType(boolean retryTransformOnDifferentMimeType) {
        this.retryTransformOnDifferentMimeType = retryTransformOnDifferentMimeType;
    }

    public boolean getStrictMimeTypeCheck() {
        return strictMimeTypeCheck;
    }

    public void setStrictMimeTypeCheck(boolean strictMimeTypeCheck) {
        this.strictMimeTypeCheck = strictMimeTypeCheck;
    }
}