com.formkiq.core.service.generator.pdfbox.PdfEditorServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.formkiq.core.service.generator.pdfbox.PdfEditorServiceImpl.java

Source

/*
 * Copyright (C) 2017 FormKiQ Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.formkiq.core.service.generator.pdfbox;

import static com.formkiq.core.form.FormFinder.findField;
import static com.formkiq.core.form.FormFinder.findFieldsByType;
import static com.formkiq.core.form.FormFinder.findWorkflowOutputDocument;
import static com.formkiq.core.form.FormFinder.hasFieldType;
import static com.formkiq.core.form.FormTransformer.removeFields;
import static com.formkiq.core.form.bean.ObjectBuilder.buildFormJSONSection;
import static com.formkiq.core.form.bean.ObjectBuilder.buildWorkflowOuputFormField;
import static com.formkiq.core.form.bean.ObjectBuilder.getNextFormJSONFieldId;
import static com.formkiq.core.form.dto.FormJSONFieldType.SELECTBOX;
import static com.formkiq.core.form.dto.FormJSONFieldType.SIGNATURE;
import static com.formkiq.core.form.dto.FormJSONFieldType.SWITCH;
import static com.formkiq.core.form.dto.FormJSONFieldType.TEXTBOX;
import static com.formkiq.core.service.generator.pdfbox.PDRectangleUtil.calculate;
import static com.formkiq.core.service.generator.pdfbox.PDRectangleUtil.getDistanceBetween;
import static com.formkiq.core.service.generator.pdfbox.PDRectangleUtil.isIntersection;
import static com.formkiq.core.util.CollectionUtil.addIfNotNull;
import static com.formkiq.core.util.Strings.extractLabelAndValue;
import static java.util.Collections.emptyList;
import static org.springframework.util.StringUtils.isEmpty;

import java.awt.Rectangle;
import java.awt.geom.GeneralPath;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.GeneralSecurityException;
import java.security.KeyPair;
import java.security.PrivateKey;
import java.security.cert.Certificate;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.visible.PDVisibleSigProperties;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.visible.PDVisibleSignDesigner;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDCheckBox;
import org.apache.pdfbox.pdmodel.interactive.form.PDComboBox;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDNonTerminalField;
import org.apache.pdfbox.pdmodel.interactive.form.PDPushButton;
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;
import org.bouncycastle.cert.X509CertificateHolder;
import org.bouncycastle.cms.CMSException;
import org.bouncycastle.cms.CMSProcessableByteArray;
import org.bouncycastle.cms.CMSSignedData;
import org.bouncycastle.cms.CMSSignedDataGenerator;
import org.bouncycastle.cms.jcajce.JcaSignerInfoGeneratorBuilder;
import org.bouncycastle.operator.ContentSigner;
import org.bouncycastle.operator.OperatorCreationException;
import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder;
import org.bouncycastle.operator.jcajce.JcaDigestCalculatorProviderBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.CollectionUtils;

import com.formkiq.core.form.JSONService;
import com.formkiq.core.form.bean.ObjectBuilder;
import com.formkiq.core.form.dto.ArchiveDTO;
import com.formkiq.core.form.dto.FormJSON;
import com.formkiq.core.form.dto.FormJSONField;
import com.formkiq.core.form.dto.FormJSONRequiredType;
import com.formkiq.core.form.dto.FormJSONSection;
import com.formkiq.core.form.dto.WorkflowOutput;
import com.formkiq.core.form.dto.WorkflowOutputDocument;
import com.formkiq.core.form.dto.WorkflowOutputFormField;
import com.formkiq.core.form.dto.WorkflowOutputPdfForm;
import com.formkiq.core.service.FormBuiltInObjectBuilder;
import com.formkiq.core.service.generator.WorkflowOutputGenerator;
import com.formkiq.core.service.generator.pdfbox.PDFieldSearchRectangle.PDFieldAreaSearch;
import com.formkiq.core.service.propertystore.PropertyStore;
import com.formkiq.core.util.CollectionUtil;
import com.formkiq.core.util.Strings;

/**
 * Implementation of {@link WorkflowOutputGenerator} using
 * https://pdfbox.apache.org.
 *
 */
public class PdfEditorServiceImpl implements WorkflowOutputGenerator, SignatureInterface {

    /** Logger. */
    private static final Logger LOG = Logger.getLogger(PdfEditorServiceImpl.class.getName());

    /** {@link PdfTextFieldMatchComparator}. */
    private PdfTextFieldMatchComparator matchComparator = new PdfTextFieldMatchComparator();

    /** {@link TextSearchAreaFilter} sorted in priority. */
    private List<TextSearchAreaFilter> textSearchFilters = Arrays.asList(new TextSearchAreaFilterInsideLines(),
            new TextSearchAreaFilterDefault());

    /** {@link JSONService}. */
    @Autowired
    private JSONService jsonService;

    /** {@link PropertyStore}. */
    @Autowired
    private PropertyStore propertyStore;

    /**
     * Add Field to Page Map.
     *
     * @param objMap {@link Map} of {@link COSDictionary} objects by
     * Page Number.
     * @param field {@link PDField}
     * @param map {@link Map}
     * @throws IOException IOException
     */
    private void addFieldToPageMap(final Map<COSDictionary, Integer> objMap, final PDField field,
            final Map<Integer, List<PDField>> map) throws IOException {

        List<PDAnnotationWidget> widgets = field.getWidgets();

        if (field instanceof PDNonTerminalField) {
            PDNonTerminalField tf = (PDNonTerminalField) field;

            for (PDField f : tf.getChildren()) {
                addFieldToPageMap(objMap, f, map);
            }

        } else {

            if (!CollectionUtils.isEmpty(widgets)) {

                LOG.log(Level.FINE, "addFieldToPageMap='" + field.getFullyQualifiedName() + "',class="
                        + field.getClass().getName());

                Integer page = getPageNumber(objMap, field);

                if (!map.containsKey(page)) {
                    map.put(page, new ArrayList<>());
                }

                map.get(page).add(field);

            } else {

                LOG.log(Level.FINE, "skip addFieldToPageMap='" + field.getFullyQualifiedName() + "',class="
                        + field.getClass().getName());
            }
        }
    }

    @Override
    public void addOutputDocument(final ArchiveDTO archive, final WorkflowOutput wo) throws IOException {

        WorkflowOutputPdfForm output = (WorkflowOutputPdfForm) wo;
        String pdfname = output.getName();
        byte[] docBytes = archive.getPdf(pdfname + ".pdf");

        if (dofullPDFSave(docBytes, archive, output)) {

            docBytes = archive.getPdf(pdfname + ".pdf");
            doSignaturePdfSave(docBytes, archive, output);
        }
    }

    /**
     * Do Partial PDF Save. This save works when updating Signature fields.
     * @param docBytes byte[]
     * @param archive {@link ArchiveDTO}
     * @param output {@link WorkflowOutputPdfForm}
     * @throws IOException IOException
     */
    private void doSignaturePdfSave(final byte[] docBytes, final ArchiveDTO archive,
            final WorkflowOutputPdfForm output) throws IOException {

        boolean signed = false;
        List<SignatureOptions> signatureOptions = new ArrayList<>();
        PDDocument doc = loadPDF(docBytes);

        try {

            PDDocumentCatalog docCatalog = doc.getDocumentCatalog();
            PDAcroForm pdform = docCatalog.getAcroForm();

            for (WorkflowOutputFormField ofield : output.getFields()) {

                Optional<FormJSON> form = findForm(archive, ofield);
                Optional<FormJSONField> field = findFormField(form, ofield);

                if (form.isPresent() && field.isPresent()) {

                    String value = field.get().getValue();

                    PDField pfield = pdform.getField(ofield.getDocumentfieldname());

                    if (pfield != null && pfield instanceof PDSignatureField) {

                        byte[] bs = form.get().getAssetData().get(value);
                        if (bs != null) {

                            try {
                                InputStream is = new ByteArrayInputStream(bs);

                                signatureOptions.add(setValue(doc, (PDSignatureField) pfield, is));

                                signed = true;

                            } catch (IllegalStateException e) {
                                LOG.log(Level.WARNING, "unable to set signature", e);
                            }
                        }
                    }
                }
            }

            if (signed) {

                ByteArrayOutputStream bs = new ByteArrayOutputStream();
                doc.saveIncremental(bs);
                bs.close();

                String pdfname = output.getName();
                archive.addPDF(pdfname + ".pdf", bs.toByteArray());
            }

            for (SignatureOptions sigOption : signatureOptions) {
                IOUtils.closeQuietly(sigOption);
            }

        } finally {
            doc.close();
        }
    }

    /**
     * Do Full PDF Save. This save works when updating the values of fields.
     * @param docBytes byte[]
     * @param archive {@link ArchiveDTO}
     * @param output {@link WorkflowOutputPdfForm}
     * @return boolean - whether signature fields are found
     * @throws IOException IOException
     */
    private boolean dofullPDFSave(final byte[] docBytes, final ArchiveDTO archive,
            final WorkflowOutputPdfForm output) throws IOException {

        boolean hasSignatures = false;
        PDDocument doc = loadPDF(docBytes);

        try {

            PDDocumentCatalog docCatalog = doc.getDocumentCatalog();
            PDAcroForm pdform = docCatalog.getAcroForm();

            for (WorkflowOutputFormField ofield : output.getFields()) {

                Optional<FormJSON> form = findForm(archive, ofield);
                Optional<FormJSONField> field = findFormField(form, ofield);

                if (form.isPresent() && field.isPresent()) {

                    PDField pdfield = pdform.getField(ofield.getDocumentfieldname());

                    if (pdfield != null) {

                        if (pdfield instanceof PDSignatureField) {
                            hasSignatures = true;
                        } else {

                            String value = field.get().getValue();
                            List<String> values = field.get().getValues();

                            if (!isEmpty(values)) {
                                for (String val : values) {
                                    pdfield.setValue(extractLabelAndValue(val).getRight());
                                }
                            } else if (!isEmpty(value)) {
                                value = extractLabelAndValue(value).getRight();
                                pdfield.setValue(value);
                            }
                        }
                    }
                }
            }

            ByteArrayOutputStream bs = new ByteArrayOutputStream();
            doc.save(bs);
            bs.close();

            String pdfname = output.getName();
            archive.addPDF(pdfname + ".pdf", bs.toByteArray());

            return hasSignatures;

        } finally {
            doc.close();
        }

    }

    /**
     * Find {@link FormJSONField} from {@link WorkflowOutputFormField}.
     * @param form {@link FormJSON}
     * @param ofield {@link WorkflowOutputFormField}
     * @return {@link FormJSONField}
     */
    private Optional<FormJSONField> findFormField(final Optional<FormJSON> form,
            final WorkflowOutputFormField ofield) {

        Optional<FormJSONField> result = Optional.empty();
        Pair<String, String> field = extractLabelAndValue(ofield.getField());

        int id = NumberUtils.toInt(field.getRight(), -1);

        if (form.isPresent()) {
            result = findField(form.get(), id);
        }

        return result;
    }

    /**
     * Find {@link FormJSON} from {@link WorkflowOutputFormField}.
     * @param archive {@link ArchiveDTO}
     * @param ofield {@link WorkflowOutputFormField}
     * @return {@link FormJSON}
     */
    private Optional<FormJSON> findForm(final ArchiveDTO archive, final WorkflowOutputFormField ofield) {

        Pair<String, String> pair = extractLabelAndValue(ofield.getForm());
        FormJSON form = archive.getForm(pair.getRight());
        return form != null ? Optional.of(form) : Optional.empty();
    }

    /**
     * Sets value of {@link PDSignatureField}.
     * @param doc {@link PDDocument}
     * @param field {@link PDSignatureField}
     * @param signatureInputStream {@link InputStream}
     * @return {@link SignatureOptions}
     * @throws IOException IOException
     */
    private SignatureOptions setValue(final PDDocument doc, final PDSignatureField field,
            final InputStream signatureInputStream) throws IOException {

        int accessPermissions = SigUtils.getMDPPermission(doc);
        if (accessPermissions == 1) {
            throw new IllegalStateException("No changes to the document are "
                    + "permitted due to DocMDP transform parameters " + "dictionary");
        }

        // retrieve signature dictionary
        PDSignature signature = field.getSignature();

        if (signature == null) {
            signature = new PDSignature();
            // after solving PDFBOX-3524 - signatureField.setValue(signature)
            // until then:
            field.getCOSObject().setItem(COSName.V, signature);
        } else {
            throw new IllegalStateException(
                    "The signature field " + field.getFullyQualifiedName() + " is already signed.");
        }

        // Optional: certify
        // can be done only if version is at least 1.5 and if not already set
        // doing this on a PDF/A-1b file fails validation by Adobe
        // preflight (PDFBOX-3821)
        // PDF/A-1b requires PDF version 1.4 max, so don't increase the version
        // on such files.
        final float version = 1.5f;
        if (doc.getVersion() >= version && accessPermissions == 0) {
            SigUtils.setMDPPermission(doc, signature, 2);
        }

        PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm();
        if (acroForm != null && acroForm.getNeedAppearances()) {
            // PDFBOX-3738 NeedAppearances true results in visible signature
            // becoming invisible
            // with Adobe Reader
            if (acroForm.getFields().isEmpty()) {
                // we can safely delete it if there are no fields
                acroForm.getCOSObject().removeItem(COSName.NEED_APPEARANCES);
                // note that if you've set MDP permissions, the removal of this
                // item
                // may result in Adobe Reader claiming that the document has
                // been changed.
                // and/or that field content won't be displayed properly.
                // ==> decide what you prefer and adjust your code accordingly.
            }
        }

        // default filter
        signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE);

        // subfilter for basic and PAdES Part 2 signatures
        signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED);

        PDVisibleSignDesigner visibleSignDesigner = new PDVisibleSignDesigner(signatureInputStream);

        PDVisibleSigProperties visibleSigProps = new PDVisibleSigProperties();
        visibleSigProps
                //        .signerName(name)  // TODO add..
                //        .signerLocation(location) // TODO add.
                //        .signatureReason(reason)
                //        .preferredSize(preferredSize)
                //        .page(0) // TODO fix
                .visualSignEnabled(true).setPdVisibleSignature(visibleSignDesigner);

        visibleSigProps.buildSignature();

        signature.setName(visibleSigProps.getSignerName());
        signature.setLocation(visibleSigProps.getSignerLocation());
        signature.setReason(visibleSigProps.getSignatureReason());

        // the signing date, needed for valid signature
        signature.setSignDate(Calendar.getInstance());

        SignatureOptions sigOptions = new SignatureOptions();
        sigOptions.setVisualSignature(visibleSigProps.getVisibleSignature());
        sigOptions.setPage(visibleSigProps.getPage() - 1);
        doc.addSignature(signature, this, sigOptions);

        return sigOptions;
    }

    /**
     * Build {@link FormJSON} from {@link PDDocument}.
     * @param doc {@link PDDocument}
     * @param texts {@link List} of {@link PdfTextField}
     * @return {@link FormJSON}
     */
    private FormJSON buildFormJSON(final PDDocument doc, final List<PdfTextField> texts) {

        String title = doc.getDocumentInformation().getTitle();

        if (isEmpty(title)) {

            float maxFont = texts.stream().map(s -> Float.valueOf(s.getFontSize())).max(Float::compare).get()
                    .floatValue();

            StringBuilder sb = new StringBuilder();

            for (PdfTextField text : texts) {

                if ((int) text.getFontSize() == (int) maxFont) {
                    sb.append(text.getText() + " ");
                }
            }

            title = sb.toString();
        }

        title = isEmpty(title) ? "Untitled" : title.trim();

        FormJSON form = ObjectBuilder.buildFormJSON(title);
        return form;
    }

    /**
     * Build {@link FormJSONSection}.
     *
     * @param form {@link FormJSON}
     * @param page {@link PDPage}
     * @param fields {@link List} of {@link PDField}
     * @param texts {@link List} of {PdfTextField}
     * @param lineRects {@link List} of {@link PDRectangle}
     * @return {@link Map} of {@link PDPage} to {@link PdfTextField}
     */
    private Map<PDField, FormJSONField> buildFormSection(final FormJSON form, final PDPage page,
            final List<PDField> fields, final List<PdfTextField> texts, final List<PDRectangle> lineRects) {

        Map<UUID, List<PdfTextField>> relatedTextMap = getRelatedTextMap(texts);

        List<PdfFieldMatch> matches = generateMatches(page, fields, texts, lineRects);

        matchTextToPdField(matches);

        removeMatchedTexts(matches, texts);

        int avgFont = getAverageFontSize(texts) + 1;

        List<PdfTextField> headers = findSectionHeaders(texts, avgFont);

        return createFormFields(form, matches, headers, relatedTextMap, avgFont);
    }

    /**
     * Removed Matched Text.
     * @param matches  {@link List} of {@link PdfFieldMatch}
     * @param texts {@link List} of {@link PdfTextField}
     */
    private void removeMatchedTexts(final List<PdfFieldMatch> matches, final List<PdfTextField> texts) {

        for (PdfFieldMatch t : matches) {

            for (PdfTextFieldMatch m : t.getMatches()) {
                texts.remove(m.getTextField());
            }

            for (PdAnnotationWidgetMatch w : t.getWidgets()) {
                for (PdfTextFieldMatch m : w.getMatches()) {
                    texts.remove(m.getTextField());
                }
            }
        }
    }

    /**
     * Create {@link List} of {@link WorkflowOutputPdfFormField}.
     * @param form {@link FormJSON}
     * @param fields {@link List} of {@link PDField}
     * @param map {@link Map} of {@link PDField} to {@link FormJSONField}
     * @return {@link List} of {@link WorkflowOutputPdfFormField}
     */
    private List<WorkflowOutputFormField> createFieldOutputs(final FormJSON form, final List<PDField> fields,
            final Map<PDField, FormJSONField> map) {

        List<WorkflowOutputFormField> list = new ArrayList<>();

        for (PDField pdfield : fields) {

            FormJSONField field = map.get(pdfield);

            WorkflowOutputFormField wo = buildWorkflowOuputFormField(form, field, pdfield.getFullyQualifiedName());

            list.add(wo);
        }

        return list;
    }

    /**
     * Create {@link FormJSONField} from {@link PDField}.
     *
     * @param pdffield {@link PdfFieldMatch}
     * @param relatedTextMap {@link Map} of {@link PdfTextField}
     * @return {@link FormJSONField}
     */
    private FormJSONField createFormField(final PdfFieldMatch pdffield,
            final Map<UUID, List<PdfTextField>> relatedTextMap) {

        FormJSONField field = new FormJSONField();
        field.setRequired(FormJSONRequiredType.OPTIONAL);

        PDField pdfield = pdffield.getField();

        field.setLabel(getLabel(pdffield, relatedTextMap));

        if (pdfield instanceof PDComboBox) {

            PDComboBox box = (PDComboBox) pdfield;

            field.setType(SELECTBOX);
            field.setOptions(box.getOptions());

        } else if (pdfield instanceof PDCheckBox) {

            PDCheckBox c = (PDCheckBox) pdfield;

            field.setOptions(c.getOnValues().stream().map(s -> s + "[" + s + "]").collect(Collectors.toList()));

            if (pdffield.getWidgets().size() == c.getOnValues().size()) {

                List<String> options = field.getOptions();
                for (int i = 0; i < options.size(); i++) {

                    PdAnnotationWidgetMatch wm = pdffield.getWidgets().get(i);
                    if (!wm.getMatches().isEmpty()) {
                        String v = extractLabelAndValue(options.get(i)).getRight();
                        options.set(i, wm.getMatches().get(0).getTextField().getText().trim() + "[" + v + "]");
                    }
                }

            }

            field.setType(SWITCH);

        } else if (pdfield instanceof PDSignatureField) {

            field.setType(SIGNATURE);

        } else {

            field.setType(TEXTBOX);
        }

        return field;
    }

    /**
     * Transfers {@link PDField} to {@link FormJSONField}.
     *
     * @param form {@link FormJSON}
     * @param pdfields {@link List} of {@link PdfFieldMatch}
     * @param headers {@link List} of {@link PdfTextField}
     * @param relatedTextMap {@link Map} of {@link PdfTextField}
     * @param avgFont int
     * @return {@link List}
     */
    private Map<PDField, FormJSONField> createFormFields(final FormJSON form, final List<PdfFieldMatch> pdfields,
            final List<PdfTextField> headers, final Map<UUID, List<PdfTextField>> relatedTextMap,
            final int avgFont) {

        int id = getNextFormJSONFieldId(form);

        boolean first = true;

        PdfTextField lastheader = null;
        FormJSONSection section = buildFormJSONSection(null, null);
        Map<PDField, FormJSONField> map = new HashMap<>();
        Iterator<PdfFieldMatch> itr = pdfields.iterator();

        while (itr.hasNext()) {

            PdfFieldMatch pdfield = itr.next();

            PdfTextField header = findClosestHeader(headers, pdfield, avgFont);

            if (first || (lastheader != null && !lastheader.equals(header))) {

                String title = null;

                if (header != null) {
                    title = header.getText().replaceAll("\t", " ");
                }

                section = buildFormJSONSection(title, new ArrayList<>());
                form.getSections().add(section);
                lastheader = header;
                first = false;
            }

            FormJSONField field = createFormField(pdfield, relatedTextMap);
            field.setId(id++);

            section.getFields().add(field);

            if (pdfield.getField() != null) {
                map.put(pdfield.getField(), field);
            }
        }

        return map;
    }

    /**
     * Find Header for {@link PdfFieldMatch}.
     * @param headers {@link List} of {@link PdfTextField}
     * @param pdfield {@link PdfFieldMatch}
     * @param fontSize float
     * @return {@link PdfTextField}
     */
    private PdfTextField findClosestHeader(final List<PdfTextField> headers, final PdfFieldMatch pdfield,
            final float fontSize) {

        PdfTextField match = null;
        PDRectangle rect = pdfield.getWidgets().get(0).getWidget().getRectangle();

        for (PdfTextField tf : headers) {

            if (fontSize == -1 || tf.getFontSize() >= fontSize) {
                if (tf.getRectangle().getLowerLeftY() > rect.getLowerLeftY()) {
                    match = tf;
                } else {
                    break;
                }
            }
        }

        return match;
    }

    /**
     * Find Possible Matches for {@link PdfTextField} and sorted by best match.
     * @param rect {@link PDRectangle}
     * @param fields {@link List}
     * @param searchAreas {@link List} of {@link PDFieldSearchRectangle}
     * @return {@link List}
     */
    private List<PdfTextFieldMatch> findPossibleMatches(final PDRectangle rect, final List<PdfTextField> fields,
            final List<PDFieldSearchRectangle> searchAreas) {

        List<PdfTextFieldMatch> rects = new ArrayList<>();
        List<PdfTextFieldMatch> list = new ArrayList<>();

        for (PdfTextField field : fields) {

            float match = getDistanceBetween(rect, field.getRectangle());

            for (PDFieldSearchRectangle sa : searchAreas) {

                if (isIntersection(field.getRectangle(), sa.getRectangle())) {

                    if (PDFieldAreaSearch.RECTANGLE.equals(sa.getType())) {
                        addIfNotNull(rects, new PdfTextFieldMatch(match, field, sa.getType()));
                    } else {
                        addIfNotNull(list, new PdfTextFieldMatch(match, field, sa.getType()));
                    }
                }
            }
        }

        list = !rects.isEmpty() ? rects : list;
        Collections.sort(list, this.matchComparator);
        return list;
    }

    /**
     * Find Section Headers.
     * @param texts {@link List} of {@link PdfTextField}
     * @param avgFont int
     * @return {@link List}
     */
    private List<PdfTextField> findSectionHeaders(final List<PdfTextField> texts, final int avgFont) {

        List<PdfTextField> headers = new ArrayList<>();

        if (avgFont > 0) {

            LOG.log(Level.FINE, "-> header section avg font size " + avgFont);

            for (PdfTextField tf : texts) {

                String text = tf.getText();
                if (tf.getFontSize() > avgFont && Strings.hasAtLeast1Letter(text) && text.length() > 1) {
                    LOG.log(Level.FINE, "-> found possible header section " + tf.getText());
                    headers.add(tf);
                }
            }
        }

        return headers;
    }

    /**
     * Find Text for {@link PDAnnotationWidget}.
     * @param widget {@link PDAnnotationWidget}
     * @param fields {@link List}
     * @param searchAreas {@link List} of {@link PDFieldSearchRectangle}
     * @return {@link PdAnnotationWidgetMatch}
     */
    private List<PdfTextFieldMatch> findTextForWidget(final PDAnnotationWidget widget,
            final List<PdfTextField> fields, final List<PDFieldSearchRectangle> searchAreas) {

        PDRectangle rect = widget.getRectangle();

        List<PdfTextFieldMatch> matches = findPossibleMatches(rect, fields, searchAreas);

        return matches;
    }

    /**
     * Generate Matches between {@link PDField} and PDF Text.
     *
     * @param page {@link PDPage}
     * @param pdFields {@link List} of {@link PDField}
     * @param textOnPage {@link List} of {@link PdfTextField}
     * @param lineRects {@link List} of {@link PDRectangle}
     * @return {@link List}
     */
    private List<PdfFieldMatch> generateMatches(final PDPage page, final List<PDField> pdFields,
            final List<PdfTextField> textOnPage, final List<PDRectangle> lineRects) {

        List<PdfFieldMatch> list = new ArrayList<>();

        for (PDField pdField : pdFields) {

            PdfFieldMatch match = new PdfFieldMatch();
            match.setField(pdField);
            match.setWidgets(new ArrayList<>());
            match.setMatches(new ArrayList<>());

            list.add(match);

            for (PDAnnotationWidget w : pdField.getWidgets()) {

                PdAnnotationWidgetMatch m = new PdAnnotationWidgetMatch();
                m.setWidget(w);
                match.getWidgets().add(m);

                List<PDFieldSearchRectangle> searchAreas = getTextSearchArea(page, pdField, w, lineRects);

                List<PdfTextFieldMatch> matches = findTextForWidget(w, textOnPage, searchAreas);

                m.setMatches(matches);
            }

            updatePDCheckBoxMatches(pdField, match);
        }

        return list;
    }

    /**
     * Adjust Matches for {@link PDCheckBox} fields.
     *
     * @param pdField {@link PDField}
     * @param match {@link PdfFieldMatch}
     */
    private void updatePDCheckBoxMatches(final PDField pdField, final PdfFieldMatch match) {

        if (pdField instanceof PDCheckBox) {

            PDCheckBox c = (PDCheckBox) pdField;

            List<String> onValues = c.getOnValues().stream().map(s -> s.trim().toUpperCase())
                    .collect(Collectors.toList());

            if (pdField.getWidgets().size() == onValues.size() && onValues.size() > 1) {

                List<PdfTextFieldMatch> bestmatch = new ArrayList<>();

                for (int i = 0; i < match.getWidgets().size(); i++) {

                    PdAnnotationWidgetMatch w = match.getWidgets().get(i);
                    String onvalue = onValues.get(i).trim().toUpperCase();

                    // check to see if 1st match matches Checkbox Value
                    if (!w.getMatches().isEmpty()
                            && (w.getMatches().get(0).getTextField().getText().toUpperCase().startsWith(onvalue))
                            || w.getMatches().get(0).getTextField().getText().toUpperCase().endsWith(onvalue)) {

                        bestmatch.add(w.getMatches().get(0));

                        // check to see if 2nd match matches Checkbox Value
                    } else if (w.getMatches().size() > 1
                            && (w.getMatches().get(1).getTextField().getText().toUpperCase().startsWith(onvalue))
                            || w.getMatches().get(1).getTextField().getText().toUpperCase().endsWith(onvalue)) {

                        bestmatch.add(w.getMatches().get(1));
                    }
                }

                if (bestmatch.size() == onValues.size()) {

                    for (PdfTextFieldMatch bm : bestmatch) {
                        bm.setMatch(0);
                    }

                    for (PdAnnotationWidgetMatch m : match.getWidgets()) {
                        Collections.sort(m.getMatches(), this.matchComparator);
                    }
                }
            }
        }
    }

    /**
     * Get the average Font size.
     * @param texts {@link List} of {@link PdfTextField}
     * @return int
     */
    private int getAverageFontSize(final List<PdfTextField> texts) {

        int fontsize = 0;

        for (PdfTextField tf : texts) {
            fontsize += Math.round(tf.getFontSize());
        }

        return !texts.isEmpty() ? fontsize / texts.size() : -1;
    }

    /**
     * Generate {@link Map} {@link COSDictionary} to Page Numbers.
     * @param doc {@link PDDocument}
     * @return {@link Map} of {@link COSDictionary} to {@link Integer}
     * @throws IOException IOException
     */
    private Map<COSDictionary, Integer> getCOSDictionaryToPageNumberMap(final PDDocument doc) throws IOException {

        Map<COSDictionary, Integer> map = new HashMap<>();

        PDPageTree pages = doc.getPages();
        for (int i = 0; i < pages.getCount(); i++) {
            for (PDAnnotation annotation : pages.get(i).getAnnotations()) {
                COSDictionary annotationObject = annotation.getCOSObject();
                map.put(annotationObject, Integer.valueOf(i));
            }
        }

        return map;
    }

    /**
     * Returns Label from {@link PDField}.
     *
     * @param pdffield {@link PdfFieldMatch}
     * @param relatedTextMap {@link Map} of {@link PdfTextField}
     * @return {@link String}
     */
    private String getLabel(final PdfFieldMatch pdffield, final Map<UUID, List<PdfTextField>> relatedTextMap) {

        List<PdfTextFieldMatch> matches = pdffield.getMatches();

        String text = !matches.isEmpty() ? matches.get(0).getTextField().getText().trim() : null;

        if (!matches.isEmpty()) {

            PdfTextFieldMatch match = matches.get(0);
            PdfTextField tf = match.getTextField();

            if (relatedTextMap.containsKey(tf.getUUID())) {

                text = relatedTextMap.get(tf.getUUID()).stream().map(f -> f.getText().trim())
                        .collect(Collectors.joining(" ")).trim();
            }
        }

        if (isEmpty(text)) {
            text = pdffield.getField().getFullyQualifiedName();
        }

        return text;
    }

    @Override
    public WorkflowOutputDocument getNewWorkflowOutputDocument() {
        return new WorkflowOutputPdfForm();
    }

    @Override
    public Pair<FormJSON, List<WorkflowOutputFormField>> getOutputFormFields(final String filename,
            final byte[] data) throws IOException {

        List<WorkflowOutputFormField> wofields = new ArrayList<>();

        PDDocument doc = loadPDF(data);

        try {

            Map<COSDictionary, Integer> obMap = getCOSDictionaryToPageNumberMap(doc);

            Map<Integer, List<PDField>> pdMap = getPDFields(doc, obMap);

            Map<Integer, List<PdfTextField>> textsMap = getTextMap(doc);

            PDPageTree pages = doc.getDocumentCatalog().getPages();

            FormJSON form = buildFormJSON(doc, textsMap.get(Integer.valueOf(0)));

            for (int i = 0; i < pages.getCount(); i++) {

                PDPage page = pages.get(i);
                Integer pageNum = Integer.valueOf(i);

                List<PDField> fields = pdMap.getOrDefault(pageNum, emptyList());

                List<PdfTextField> texts = getTextForPage(textsMap, pageNum);

                List<PDRectangle> lineRects = getPageLinePaths(pages.get(i));

                Map<PDField, FormJSONField> fieldMap = buildFormSection(form, page, fields, texts, lineRects);

                List<WorkflowOutputFormField> outfields = createFieldOutputs(form, fields, fieldMap);

                wofields.addAll(outfields);
            }

            return Pair.of(form, wofields);

        } finally {
            doc.close();
        }
    }

    /**
     * Get the Page Line Paths.
     * @param page {@link PDPage}
     * @return {@link List} of {@link PDRectangle}
     */
    private List<PDRectangle> getPageLinePaths(final PDPage page) {

        List<PDRectangle> rects = new ArrayList<>();

        try {
            PdfLinePaths linePaths = new PdfLinePaths(page);
            linePaths.processPage();

            List<GeneralPath> lines = linePaths.getLinePaths();

            for (GeneralPath gp : lines) {
                Rectangle r = gp.getBounds();
                rects.add(new PDRectangle(r.x, r.y, r.width, r.height));
            }

        } catch (IOException e) {
            LOG.log(Level.WARNING, "Unable to find Page Line Paths", e);
            rects = Collections.emptyList();
        }

        return rects;
    }

    /**
     * Get Page Number for {@link PDField}.
     * @param objMap {@link Map} of {@link COSDictionary} and Page Number
     * @param field {@link PDField}
     * @return {@link Integer}
     */
    private Integer getPageNumber(final Map<COSDictionary, Integer> objMap, final PDField field) {
        return getPageNumber(objMap, field, field.getWidgets().get(0));
    }

    /**
     * Get Page Number for {@link PDField}.
     * @param objMap {@link Map} of {@link COSDictionary} and Page Number
     * @param field {@link PDField}
     * @param widget {@link PDAnnotationWidget}
     * @return {@link Integer}
     */
    private Integer getPageNumber(final Map<COSDictionary, Integer> objMap, final PDField field,
            final PDAnnotationWidget widget) {

        COSDictionary cosObject = widget.getCOSObject();

        if (objMap.containsKey(cosObject)) {
            return objMap.get(cosObject);
        }

        throw new PdfConversionException("cannot find page for " + field.getFullyQualifiedName());
    }

    /**
     * Take {@link PDField} objects from {@link PDDocument}
     * and create {@link PDField} from them.
     *
     * @param doc {@link PDDocument}
     * @param objMap {@link Map} of {@link COSDictionary} and Page Number
     * @return {@link Map} of {@link PDField} by page number
     * @throws IOException IOException
     */
    private Map<Integer, List<PDField>> getPDFields(final PDDocument doc, final Map<COSDictionary, Integer> objMap)
            throws IOException {

        PDDocumentCatalog dc = doc.getDocumentCatalog();
        PDAcroForm pdform = dc.getAcroForm();

        Map<Integer, List<PDField>> map = new HashMap<>();

        for (PDField field : pdform.getFields()) {

            if (field instanceof PDPushButton) {
                LOG.log(Level.FINE, "skip addFieldToPageMap='" + field.getFullyQualifiedName() + "',class="
                        + field.getClass().getName());
                continue;
            }

            addFieldToPageMap(objMap, field, map);
        }

        for (Map.Entry<Integer, List<PDField>> e : map.entrySet()) {
            Collections.sort(e.getValue(), new PDFieldComparator());
        }

        return map;
    }

    /**
     * Gets the Text for Page and join really close text together.
     * @param texts {@link List} of {@link PdfTextField}
     * @return {@link Map} of {@link UUID} and {@link PdfTextField}
     */
    private Map<UUID, List<PdfTextField>> getRelatedTextMap(final List<PdfTextField> texts) {

        List<List<PdfTextField>> list = CollectionUtil.groupBy(texts, new PdfTextFieldTextXYComparator(),
                new PdfTextFieldTextGroupComparator());

        Map<UUID, List<PdfTextField>> relatedTextMap = new HashMap<>();

        for (List<PdfTextField> tomerge : list) {

            if (tomerge.size() > 1) {

                for (PdfTextField tf : tomerge) {
                    relatedTextMap.put(tf.getUUID(), tomerge);
                }
            }
        }

        return relatedTextMap;
    }

    /**
     * Gets Text for Page and merges text together where appropriate.
     *
     * @param textsMap
     *            {@link Map} of Page Number and {@link List} of
     *            {@link PdfTextField}
     * @param pageNum
     *            {@link Integer}
     * @return {@link List} of {@link PdfTextField}
     */
    private List<PdfTextField> getTextForPage(final Map<Integer, List<PdfTextField>> textsMap,
            final Integer pageNum) {

        List<PdfTextField> texts = new ArrayList<>(textsMap.getOrDefault(pageNum, emptyList()));

        texts = joinHorizontalSimilarText(texts);

        return texts;
    }

    /**
     * Get a Text Map from {@link PDDocument}.
     * @param doc {@link PDDocument}
     * @return {@link Map}
     * @throws IOException IOException
     */
    public Map<Integer, List<PdfTextField>> getTextMap(final PDDocument doc) throws IOException {

        TextToPDFieldMapper mapper = new TextToPDFieldMapper();
        mapper.setSortByPosition(true);
        mapper.getText(doc);

        Map<Integer, List<PdfTextField>> texts = mapper.getTextLocations();
        return texts;
    }

    /**
     * Finds the Search area to look for {@link PDField} Text Label.
     *
     * @param page {@link PDPage}
     * @param pdField {@link PDField}
     * @param widget {@link PDAnnotationWidget}
     * @param lineRects {@link List} of {@link PDRectangle}
     * @return {@link List} of {@link PDFieldSearchRectangle}
     */
    private List<PDFieldSearchRectangle> getTextSearchArea(final PDPage page, final PDField pdField,
            final PDAnnotationWidget widget, final List<PDRectangle> lineRects) {

        List<PDFieldSearchRectangle> area = new ArrayList<>();
        for (TextSearchAreaFilter filter : this.textSearchFilters) {
            area.addAll(filter.getTextSearchArea(page, pdField, widget, lineRects));
        }

        return area;
    }

    @Override
    public boolean isSupported(final WorkflowOutput wo) {
        return wo instanceof WorkflowOutputPdfForm;
    }

    /**
     * Join similar horizontal text together.
     * @param texts {@link List} of {@link PdfTextField}
     * @return {@link List} of {@link PdfTextField}
     */
    private List<PdfTextField> joinHorizontalSimilarText(final List<PdfTextField> texts) {

        List<PdfTextField> ret = new ArrayList<>(texts);

        PdfTextFieldTextXYComparator xy = new PdfTextFieldTextXYComparator();
        List<List<PdfTextField>> list = CollectionUtil.groupBy(ret, new PdfTextFieldTextYXComparator(),
                new PdfTextFieldHorizontalGroupComparator());

        for (List<PdfTextField> tomerge : list) {

            if (tomerge.size() > 1) {

                Collections.sort(tomerge, xy);
                String s = tomerge.stream().map(p -> p.getText()).collect(Collectors.joining(" "));

                PDRectangle rect = calculate(
                        tomerge.stream().map(p -> p.getRectangle()).collect(Collectors.toList()));

                PdfTextField tf = tomerge.get(0);
                tf.setText(s);
                tf.setRectangle(rect);

                for (int i = 1; i < tomerge.size(); i++) {
                    ret.remove(tomerge.get(i));
                }
            }
        }

        return ret;
    }

    /**
     * Load PDDocument.
     * @param data byte[]
     * @return {@link PDDocument}
     * @throws IOException IOException
     */
    public PDDocument loadPDF(final byte[] data) throws IOException {
        return PDDocument.load(new ByteArrayInputStream(data));
    }

    /**
     * Create Map of Text to {@link PDAnnotationWidget} that best matches.
     * @param matches {@link List}
     */
    private void matchTextToPdField(final List<PdfFieldMatch> matches) {

        // sets all widget matches to PdfFieldMatch
        matches.forEach(t -> t.setMatches(t.getWidgets().stream().flatMap(s -> s.getMatches().stream()).distinct()
                .collect(Collectors.toList())));

        List<PdAnnotationWidgetMatch> widgets = matches.stream().flatMap(s -> s.getWidgets().stream())
                .collect(Collectors.toList());

        removeWithBetterMatches(widgets);

        widgets.forEach(t -> t.setMatches(
                !t.getMatches().isEmpty() ? Arrays.asList(t.getMatches().get(0)) : Collections.emptyList()));

        for (PdfFieldMatch fm : matches) {

            if (fm.getField() instanceof PDCheckBox && fm.getWidgets().size() > 1) {

                updatePDCheckBoxPdfTextFieldMatch(fm);
                Collections.sort(fm.getMatches(), this.matchComparator);

                if (!fm.getMatches().isEmpty()) {
                    fm.setMatches(Arrays.asList(fm.getMatches().get(0)));
                }

            } else if (!fm.getWidgets().isEmpty()) {
                fm.setMatches(fm.getWidgets().get(0).getMatches());
            }
        }
    }

    /**
     * Remove {@link PdfFieldMatch} {@link PdfTextFieldMatch} that occur in
     * the {@link PdAnnotationWidgetMatch}.
     *
     * @param fm {@link PdfFieldMatch}
     */
    private void updatePDCheckBoxPdfTextFieldMatch(final PdfFieldMatch fm) {

        Optional<Float> maxX = fm.getWidgets().stream()
                .map(s -> Float.valueOf(s.getWidget().getRectangle().getUpperRightX()))
                .collect(Collectors.maxBy(Float::compareTo));

        List<PdfTextFieldMatch> list = fm.getWidgets().stream().flatMap(s -> s.getMatches().stream())
                .collect(Collectors.toList());

        Set<PdfTextField> set = list.stream().map(s -> s.getTextField()).collect(Collectors.toSet());

        Iterator<PdfTextFieldMatch> itr = fm.getMatches().iterator();
        while (itr.hasNext()) {
            PdfTextFieldMatch m = itr.next();

            if (m.getMatch() < 0 || set.contains(m.getTextField())) {
                itr.remove();
            } else if (maxX.isPresent()) {
                if (maxX.get().floatValue() < m.getTextField().getRectangle().getLowerLeftX()) {
                    itr.remove();
                }
            }
        }

        Collections.sort(list, this.matchComparator);
    }

    /**
     * Remove duplicate {@link PdfTextField} that are better matches.
     *
     * @param widgets {@link List} of {@link PdAnnotationWidgetMatch}
     */
    private void removeWithBetterMatches(final List<PdAnnotationWidgetMatch> widgets) {

        for (PdAnnotationWidgetMatch widget : widgets) {

            Iterator<PdfTextFieldMatch> itr = widget.getMatches().iterator();
            while (itr.hasNext()) {
                if (hasBetterMatch(itr.next(), widget, widgets)) {
                    itr.remove();
                }
            }
        }
    }

    /**
     * Has Better {@link PdfTextFieldMatch}.
     *
     * @param textfield {@link PdfTextFieldMatch}
     * @param widget {@link PdAnnotationWidgetMatch}
     * @param widgets {@link List} of {@link PdAnnotationWidgetMatch}
     * @return boolean
     */
    private boolean hasBetterMatch(final PdfTextFieldMatch textfield, final PdAnnotationWidgetMatch widget,
            final List<PdAnnotationWidgetMatch> widgets) {

        for (PdAnnotationWidgetMatch w : widgets) {

            if (!w.equals(widget)) {

                for (PdfTextFieldMatch match : w.getMatches()) {

                    if (match.getMatch() < textfield.getMatch()
                            && textfield.getTextField().equals(match.getTextField())) {
                        return true;
                    }
                }
            }
        }

        return false;
    }

    @Override
    public void postGenerateCallback(final ArchiveDTO archive, final FormJSON form) throws IOException {

        if (hasFieldType(form, SIGNATURE)) {

            FormBuiltInObjectBuilder builder = new FormBuiltInObjectBuilder();
            FormJSON signatureForm = builder.getSignatureForm(this.jsonService);
            archive.addStep(signatureForm);

            List<FormJSONField> sfields = findFieldsByType(form, SIGNATURE);
            removeFields(form, sfields);

            for (FormJSONField sigfield : sfields) {

                signatureForm.getSections().get(0).getFields().add(sigfield);

                Optional<WorkflowOutputFormField> op = findWorkflowOutputDocument(archive.getWorkflow(), form,
                        sigfield);

                if (op.isPresent()) {
                    op.get().setForm(signatureForm.getName() + "[" + signatureForm.getUUID() + "]");
                }
            }
        }
    }

    @Override
    public byte[] sign(final InputStream content) throws IOException {

        try {

            KeyPair key = this.propertyStore.getKeyPair();

            PrivateKey privKey = key.getPrivate();

            Certificate certificate = this.propertyStore.getCertificate(key);

            CMSSignedDataGenerator gen = new CMSSignedDataGenerator();
            org.bouncycastle.asn1.x509.Certificate cert = org.bouncycastle.asn1.x509.Certificate
                    .getInstance(certificate.getEncoded());
            ContentSigner sha1Signer = new JcaContentSignerBuilder("SHA256WithRSA").build(privKey);
            gen.addSignerInfoGenerator(
                    new JcaSignerInfoGeneratorBuilder(new JcaDigestCalculatorProviderBuilder().build())
                            .build(sha1Signer, new X509CertificateHolder(cert)));

            CMSProcessableByteArray msg = new CMSProcessableByteArray(IOUtils.toByteArray(content));

            CMSSignedData signedData = gen.generate(msg, false);

            return signedData.getEncoded();

        } catch (GeneralSecurityException | CMSException | OperatorCreationException e) {
            throw new IOException(e);
        }
    }
}