net.padaf.preflight.helpers.TrailerValidationHelper.java Source code

Java tutorial

Introduction

Here is the source code for net.padaf.preflight.helpers.TrailerValidationHelper.java

Source

/*******************************************************************************
 * Copyright 2010 Atos Worldline SAS
 * 
 * Licensed by Atos Worldline SAS under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * Atos Worldline SAS licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *       http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
/**
 * 
 */
package net.padaf.preflight.helpers;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import net.padaf.preflight.DocumentHandler;
import net.padaf.preflight.ValidationConstants;
import net.padaf.preflight.ValidationException;
import net.padaf.preflight.ValidationResult;
import net.padaf.preflight.ValidatorConfig;
import net.padaf.preflight.ValidationResult.ValidationError;
import net.padaf.preflight.utils.COSUtils;
import net.padaf.preflight.utils.PdfElementParser;

import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdmodel.PDDocument;

/**
 * @author eric
 * 
 */
public class TrailerValidationHelper extends AbstractValidationHelper {

    public TrailerValidationHelper(ValidatorConfig cfg) throws ValidationException {
        super(cfg);
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * net.awl.edoc.pdfa.validation.helpers.AbstractValidationHelper#validate(
     * net.awl.edoc.pdfa.validation.DocumentHandler)
     */
    @Override
    public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException {

        List<ValidationError> result = new ArrayList<ValidationError>(0);
        PDDocument pdfDoc = handler.getDocument();

        COSDictionary linearizedDict = isLinearizedPdf(pdfDoc);
        if (linearizedDict != null) {
            // it is a linearized PDF, check the linearized dictionary
            checkLinearizedDictionnary(linearizedDict, result);

            // if the pdf is a linearized pdf. the first trailer must be checked
            // and it must have the same ID than the last trailer.
            List<String> lTrailers = handler.getPdfExtractor().getAllTrailers();
            String firstTrailer = lTrailers.get(0);
            String lastTrailer = lTrailers.get(lTrailers.size() - 1);

            COSDictionary first = null;
            COSDictionary last = null;
            COSDocument cd = null;
            try {
                cd = new COSDocument();
                PdfElementParser parser1 = new PdfElementParser(cd, firstTrailer.getBytes());
                first = parser1.parseAsDictionary();
                PdfElementParser parser2 = new PdfElementParser(cd, lastTrailer.getBytes());
                last = parser2.parseAsDictionary();

                checkMainTrailer(pdfDoc.getDocument(), first, result);
                if (!compareIds(first, last, pdfDoc.getDocument())) {
                    result.add(new ValidationResult.ValidationError(
                            ValidationConstants.ERROR_SYNTAX_TRAILER_ID_CONSISTENCY,
                            "ID is different in the first and the last trailer"));
                }

            } catch (IOException e) {
                result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER,
                        "Unable to parse trailers of the linearized PDF"));
            } finally {
                COSUtils.closeDocumentQuietly(cd);
            }

        } else {
            // If the PDF isn't a linearized one, only the last trailer must be
            // checked
            List<String> lTrailers = handler.getPdfExtractor().getAllTrailers();
            String lastTrailer = lTrailers.get(lTrailers.size() - 1);

            COSDocument cd = null;
            try {
                cd = new COSDocument();
                PdfElementParser parser = new PdfElementParser(cd, lastTrailer.getBytes());
                COSDictionary trailer = parser.parseAsDictionary();
                checkMainTrailer(pdfDoc.getDocument(), trailer, result);
            } catch (IOException e) {
                result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER,
                        "The trailer dictionary is missing"));
            } finally {
                try {
                    cd.close();
                } catch (IOException e) {
                    COSUtils.closeDocumentQuietly(cd);
                }
            }

        }
        return result;
    }

    /**
     * Return true if the ID of the first dictionary is the same as the id of the
     * last dictionary Return false otherwise.
     * 
     * @param first
     * @param last
     * @return
     */
    protected boolean compareIds(COSDictionary first, COSDictionary last, COSDocument doc) {
        COSBase idFirst = first.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_ID));
        COSBase idLast = last.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_ID));

        if (idFirst == null || idLast == null) {
            return false;
        }

        // ---- cast two COSBase to COSArray.
        COSArray af = COSUtils.getAsArray(idFirst, doc);
        COSArray al = COSUtils.getAsArray(idLast, doc);

        // ---- if one COSArray is null, the PDF/A isn't valid
        if ((af == null) || (al == null)) {
            return false;
        }

        // ---- compare both arrays
        boolean isEqual = true;
        for (Object of : af.toList()) {
            boolean oneIsEquals = false;
            for (Object ol : al.toList()) {
                // ---- according to PDF Reference 1-4, ID is an array containing two
                // strings
                if (!oneIsEquals)
                    oneIsEquals = ((COSString) ol).getString().equals(((COSString) of).getString());
            }
            isEqual = isEqual && oneIsEquals;
        }
        return isEqual;
    }

    /**
     * check if all keys are authorized in a trailer dictionary and if the type is
     * valid.
     * 
     * @param trailer
     * @param lErrors
     */
    protected void checkMainTrailer(COSDocument doc, COSDictionary trailer, List<ValidationError> lErrors) {
        boolean id = false;
        boolean root = false;
        boolean size = false;
        boolean prev = false;
        boolean info = false;
        boolean encrypt = false;

        for (Object key : trailer.keySet()) {
            if (!(key instanceof COSName)) {
                lErrors.add(new ValidationResult.ValidationError(
                        ValidationConstants.ERROR_SYNTAX_DICTIONARY_KEY_INVALID,
                        "Invalid key in The trailer dictionary"));
                return;
            }

            String cosName = ((COSName) key).getName();
            if (cosName.equals(TRAILER_DICTIONARY_KEY_ENCRYPT)) {
                encrypt = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_SIZE)) {
                size = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_PREV)) {
                prev = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_ROOT)) {
                root = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_INFO)) {
                info = true;
            }
            if (cosName.equals(TRAILER_DICTIONARY_KEY_ID)) {
                id = true;
            }
        }

        // ---- PDF/A Trailer dictionary must contain the ID key
        if (!id) {
            lErrors.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_MISSING_ID,
                    "The trailer dictionary doesn't contain ID"));
        } else {
            COSBase trailerId = trailer.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_ID));
            if (!COSUtils.isArray(trailerId, doc)) {
                lErrors.add(
                        new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                                "The trailer dictionary contains an id but it isn't an array"));
            }
        }
        // ---- PDF/A Trailer dictionary mustn't contain the Encrypt key
        if (encrypt) {
            lErrors.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_ENCRYPT,
                    "The trailer dictionary contains Encrypt"));
        }
        // ---- PDF Trailer dictionary must contain the Size key
        if (!size) {
            lErrors.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_MISSING_SIZE,
                    "The trailer dictionary doesn't contain Size"));
        } else {
            COSBase trailerSize = trailer.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_SIZE));
            if (!COSUtils.isInteger(trailerSize, doc)) {
                lErrors.add(
                        new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                                "The trailer dictionary contains a size but it isn't an integer"));
            }
        }

        // ---- PDF Trailer dictionary must contain the Root key
        if (!root) {
            lErrors.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_MISSING_ROOT,
                    "The trailer dictionary doesn't contain Root"));
        } else {
            COSBase trailerRoot = trailer.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_ROOT));
            if (!COSUtils.isDictionary(trailerRoot, doc)) {
                lErrors.add(
                        new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                                "The trailer dictionary contains a root but it isn't a dictionary"));
            }
        }
        // ---- PDF Trailer dictionary may contain the Prev key
        if (prev) {
            COSBase trailerPrev = trailer.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_PREV));
            if (!COSUtils.isInteger(trailerPrev, doc)) {
                lErrors.add(
                        new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                                "The trailer dictionary contains a prev but it isn't an integer"));
            }
        }
        // ---- PDF Trailer dictionary may contain the Info key
        if (info) {
            COSBase trailerInfo = trailer.getItem(COSName.getPDFName(TRAILER_DICTIONARY_KEY_INFO));
            if (!COSUtils.isDictionary(trailerInfo, doc)) {
                lErrors.add(
                        new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER_TYPE_INVALID,
                                "The trailer dictionary contains an info but it isn't a dictionary"));
            }
        }
    }

    /**
     * According to the PDF Reference, A linearized PDF contain a dictionary as
     * first object (linearized dictionary) and only this one in the first
     * section.
     * 
     * @param document
     * @return
     */
    protected COSDictionary isLinearizedPdf(PDDocument document) {
        // ---- Get Ref to obj
        COSDocument cDoc = document.getDocument();
        List<?> lObj = cDoc.getObjects();
        for (Object object : lObj) {
            COSBase curObj = ((COSObject) object).getObject();
            if (curObj instanceof COSDictionary
                    && ((COSDictionary) curObj).keySet().contains(COSName.getPDFName(DICTIONARY_KEY_LINEARIZED))) {
                return (COSDictionary) curObj;
            }
        }
        return null;
    }

    /**
     * Check if mandatory keys of linearized dictionary are present.
     * 
     * @param lErrors
     */
    protected void checkLinearizedDictionnary(COSDictionary linearizedDict, List<ValidationError> lErrors) {
        // ---- check if all keys are authorized in a linearized dictionary
        // ---- Linearized dictionary must contain the lhoent keys
        boolean l = false;
        boolean h = false;
        boolean o = false;
        boolean e = false;
        boolean n = false;
        boolean t = false;

        for (Object key : linearizedDict.keySet()) {
            if (!(key instanceof COSName)) {
                lErrors.add(new ValidationResult.ValidationError(
                        ValidationConstants.ERROR_SYNTAX_DICTIONARY_KEY_INVALID,
                        "Invalid key in The Linearized dictionary"));
                return;
            }

            String cosName = ((COSName) key).getName();
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_L)) {
                l = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_H)) {
                h = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_O)) {
                o = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_E)) {
                e = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_N)) {
                n = true;
            }
            if (cosName.equals(DICTIONARY_KEY_LINEARIZED_T)) {
                t = true;
            }
        }

        if (!(l && h && o && e && t && n)) {
            lErrors.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_DICT_INVALID,
                    "Invalid key in The Linearized dictionary"));
        }

        return;
    }
}