Java tutorial
/* * Copyright (C) 2017 FormKiQ Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.formkiq.core.service.generator.pdfbox; import static com.formkiq.core.form.FormFinder.findField; import static com.formkiq.core.form.FormFinder.findFieldsByType; import static com.formkiq.core.form.FormFinder.findWorkflowOutputDocument; import static com.formkiq.core.form.FormFinder.hasFieldType; import static com.formkiq.core.form.FormTransformer.removeFields; import static com.formkiq.core.form.bean.ObjectBuilder.buildFormJSONSection; import static com.formkiq.core.form.bean.ObjectBuilder.buildWorkflowOuputFormField; import static com.formkiq.core.form.bean.ObjectBuilder.getNextFormJSONFieldId; import static com.formkiq.core.form.dto.FormJSONFieldType.SELECTBOX; import static com.formkiq.core.form.dto.FormJSONFieldType.SIGNATURE; import static com.formkiq.core.form.dto.FormJSONFieldType.SWITCH; import static com.formkiq.core.form.dto.FormJSONFieldType.TEXTBOX; import static com.formkiq.core.service.generator.pdfbox.PDRectangleUtil.calculate; import static com.formkiq.core.service.generator.pdfbox.PDRectangleUtil.getDistanceBetween; import static com.formkiq.core.service.generator.pdfbox.PDRectangleUtil.isIntersection; import static com.formkiq.core.util.CollectionUtil.addIfNotNull; import static com.formkiq.core.util.Strings.extractLabelAndValue; import static java.util.Collections.emptyList; import static org.springframework.util.StringUtils.isEmpty; import java.awt.Rectangle; import java.awt.geom.GeneralPath; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.security.GeneralSecurityException; import java.security.KeyPair; import java.security.PrivateKey; import java.security.cert.Certificate; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.UUID; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageTree; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.visible.PDVisibleSigProperties; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.visible.PDVisibleSignDesigner; import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; import org.apache.pdfbox.pdmodel.interactive.form.PDCheckBox; import org.apache.pdfbox.pdmodel.interactive.form.PDComboBox; import org.apache.pdfbox.pdmodel.interactive.form.PDField; import org.apache.pdfbox.pdmodel.interactive.form.PDNonTerminalField; import org.apache.pdfbox.pdmodel.interactive.form.PDPushButton; import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField; import org.bouncycastle.cert.X509CertificateHolder; import org.bouncycastle.cms.CMSException; import org.bouncycastle.cms.CMSProcessableByteArray; import org.bouncycastle.cms.CMSSignedData; import org.bouncycastle.cms.CMSSignedDataGenerator; import org.bouncycastle.cms.jcajce.JcaSignerInfoGeneratorBuilder; import org.bouncycastle.operator.ContentSigner; import org.bouncycastle.operator.OperatorCreationException; import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder; import org.bouncycastle.operator.jcajce.JcaDigestCalculatorProviderBuilder; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.util.CollectionUtils; import com.formkiq.core.form.JSONService; import com.formkiq.core.form.bean.ObjectBuilder; import com.formkiq.core.form.dto.ArchiveDTO; import com.formkiq.core.form.dto.FormJSON; import com.formkiq.core.form.dto.FormJSONField; import com.formkiq.core.form.dto.FormJSONRequiredType; import com.formkiq.core.form.dto.FormJSONSection; import com.formkiq.core.form.dto.WorkflowOutput; import com.formkiq.core.form.dto.WorkflowOutputDocument; import com.formkiq.core.form.dto.WorkflowOutputFormField; import com.formkiq.core.form.dto.WorkflowOutputPdfForm; import com.formkiq.core.service.FormBuiltInObjectBuilder; import com.formkiq.core.service.generator.WorkflowOutputGenerator; import com.formkiq.core.service.generator.pdfbox.PDFieldSearchRectangle.PDFieldAreaSearch; import com.formkiq.core.service.propertystore.PropertyStore; import com.formkiq.core.util.CollectionUtil; import com.formkiq.core.util.Strings; /** * Implementation of {@link WorkflowOutputGenerator} using * https://pdfbox.apache.org. * */ public class PdfEditorServiceImpl implements WorkflowOutputGenerator, SignatureInterface { /** Logger. */ private static final Logger LOG = Logger.getLogger(PdfEditorServiceImpl.class.getName()); /** {@link PdfTextFieldMatchComparator}. */ private PdfTextFieldMatchComparator matchComparator = new PdfTextFieldMatchComparator(); /** {@link TextSearchAreaFilter} sorted in priority. */ private List<TextSearchAreaFilter> textSearchFilters = Arrays.asList(new TextSearchAreaFilterInsideLines(), new TextSearchAreaFilterDefault()); /** {@link JSONService}. */ @Autowired private JSONService jsonService; /** {@link PropertyStore}. */ @Autowired private PropertyStore propertyStore; /** * Add Field to Page Map. * * @param objMap {@link Map} of {@link COSDictionary} objects by * Page Number. * @param field {@link PDField} * @param map {@link Map} * @throws IOException IOException */ private void addFieldToPageMap(final Map<COSDictionary, Integer> objMap, final PDField field, final Map<Integer, List<PDField>> map) throws IOException { List<PDAnnotationWidget> widgets = field.getWidgets(); if (field instanceof PDNonTerminalField) { PDNonTerminalField tf = (PDNonTerminalField) field; for (PDField f : tf.getChildren()) { addFieldToPageMap(objMap, f, map); } } else { if (!CollectionUtils.isEmpty(widgets)) { LOG.log(Level.FINE, "addFieldToPageMap='" + field.getFullyQualifiedName() + "',class=" + field.getClass().getName()); Integer page = getPageNumber(objMap, field); if (!map.containsKey(page)) { map.put(page, new ArrayList<>()); } map.get(page).add(field); } else { LOG.log(Level.FINE, "skip addFieldToPageMap='" + field.getFullyQualifiedName() + "',class=" + field.getClass().getName()); } } } @Override public void addOutputDocument(final ArchiveDTO archive, final WorkflowOutput wo) throws IOException { WorkflowOutputPdfForm output = (WorkflowOutputPdfForm) wo; String pdfname = output.getName(); byte[] docBytes = archive.getPdf(pdfname + ".pdf"); if (dofullPDFSave(docBytes, archive, output)) { docBytes = archive.getPdf(pdfname + ".pdf"); doSignaturePdfSave(docBytes, archive, output); } } /** * Do Partial PDF Save. This save works when updating Signature fields. * @param docBytes byte[] * @param archive {@link ArchiveDTO} * @param output {@link WorkflowOutputPdfForm} * @throws IOException IOException */ private void doSignaturePdfSave(final byte[] docBytes, final ArchiveDTO archive, final WorkflowOutputPdfForm output) throws IOException { boolean signed = false; List<SignatureOptions> signatureOptions = new ArrayList<>(); PDDocument doc = loadPDF(docBytes); try { PDDocumentCatalog docCatalog = doc.getDocumentCatalog(); PDAcroForm pdform = docCatalog.getAcroForm(); for (WorkflowOutputFormField ofield : output.getFields()) { Optional<FormJSON> form = findForm(archive, ofield); Optional<FormJSONField> field = findFormField(form, ofield); if (form.isPresent() && field.isPresent()) { String value = field.get().getValue(); PDField pfield = pdform.getField(ofield.getDocumentfieldname()); if (pfield != null && pfield instanceof PDSignatureField) { byte[] bs = form.get().getAssetData().get(value); if (bs != null) { try { InputStream is = new ByteArrayInputStream(bs); signatureOptions.add(setValue(doc, (PDSignatureField) pfield, is)); signed = true; } catch (IllegalStateException e) { LOG.log(Level.WARNING, "unable to set signature", e); } } } } } if (signed) { ByteArrayOutputStream bs = new ByteArrayOutputStream(); doc.saveIncremental(bs); bs.close(); String pdfname = output.getName(); archive.addPDF(pdfname + ".pdf", bs.toByteArray()); } for (SignatureOptions sigOption : signatureOptions) { IOUtils.closeQuietly(sigOption); } } finally { doc.close(); } } /** * Do Full PDF Save. This save works when updating the values of fields. * @param docBytes byte[] * @param archive {@link ArchiveDTO} * @param output {@link WorkflowOutputPdfForm} * @return boolean - whether signature fields are found * @throws IOException IOException */ private boolean dofullPDFSave(final byte[] docBytes, final ArchiveDTO archive, final WorkflowOutputPdfForm output) throws IOException { boolean hasSignatures = false; PDDocument doc = loadPDF(docBytes); try { PDDocumentCatalog docCatalog = doc.getDocumentCatalog(); PDAcroForm pdform = docCatalog.getAcroForm(); for (WorkflowOutputFormField ofield : output.getFields()) { Optional<FormJSON> form = findForm(archive, ofield); Optional<FormJSONField> field = findFormField(form, ofield); if (form.isPresent() && field.isPresent()) { PDField pdfield = pdform.getField(ofield.getDocumentfieldname()); if (pdfield != null) { if (pdfield instanceof PDSignatureField) { hasSignatures = true; } else { String value = field.get().getValue(); List<String> values = field.get().getValues(); if (!isEmpty(values)) { for (String val : values) { pdfield.setValue(extractLabelAndValue(val).getRight()); } } else if (!isEmpty(value)) { value = extractLabelAndValue(value).getRight(); pdfield.setValue(value); } } } } } ByteArrayOutputStream bs = new ByteArrayOutputStream(); doc.save(bs); bs.close(); String pdfname = output.getName(); archive.addPDF(pdfname + ".pdf", bs.toByteArray()); return hasSignatures; } finally { doc.close(); } } /** * Find {@link FormJSONField} from {@link WorkflowOutputFormField}. * @param form {@link FormJSON} * @param ofield {@link WorkflowOutputFormField} * @return {@link FormJSONField} */ private Optional<FormJSONField> findFormField(final Optional<FormJSON> form, final WorkflowOutputFormField ofield) { Optional<FormJSONField> result = Optional.empty(); Pair<String, String> field = extractLabelAndValue(ofield.getField()); int id = NumberUtils.toInt(field.getRight(), -1); if (form.isPresent()) { result = findField(form.get(), id); } return result; } /** * Find {@link FormJSON} from {@link WorkflowOutputFormField}. * @param archive {@link ArchiveDTO} * @param ofield {@link WorkflowOutputFormField} * @return {@link FormJSON} */ private Optional<FormJSON> findForm(final ArchiveDTO archive, final WorkflowOutputFormField ofield) { Pair<String, String> pair = extractLabelAndValue(ofield.getForm()); FormJSON form = archive.getForm(pair.getRight()); return form != null ? Optional.of(form) : Optional.empty(); } /** * Sets value of {@link PDSignatureField}. * @param doc {@link PDDocument} * @param field {@link PDSignatureField} * @param signatureInputStream {@link InputStream} * @return {@link SignatureOptions} * @throws IOException IOException */ private SignatureOptions setValue(final PDDocument doc, final PDSignatureField field, final InputStream signatureInputStream) throws IOException { int accessPermissions = SigUtils.getMDPPermission(doc); if (accessPermissions == 1) { throw new IllegalStateException("No changes to the document are " + "permitted due to DocMDP transform parameters " + "dictionary"); } // retrieve signature dictionary PDSignature signature = field.getSignature(); if (signature == null) { signature = new PDSignature(); // after solving PDFBOX-3524 - signatureField.setValue(signature) // until then: field.getCOSObject().setItem(COSName.V, signature); } else { throw new IllegalStateException( "The signature field " + field.getFullyQualifiedName() + " is already signed."); } // Optional: certify // can be done only if version is at least 1.5 and if not already set // doing this on a PDF/A-1b file fails validation by Adobe // preflight (PDFBOX-3821) // PDF/A-1b requires PDF version 1.4 max, so don't increase the version // on such files. final float version = 1.5f; if (doc.getVersion() >= version && accessPermissions == 0) { SigUtils.setMDPPermission(doc, signature, 2); } PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm(); if (acroForm != null && acroForm.getNeedAppearances()) { // PDFBOX-3738 NeedAppearances true results in visible signature // becoming invisible // with Adobe Reader if (acroForm.getFields().isEmpty()) { // we can safely delete it if there are no fields acroForm.getCOSObject().removeItem(COSName.NEED_APPEARANCES); // note that if you've set MDP permissions, the removal of this // item // may result in Adobe Reader claiming that the document has // been changed. // and/or that field content won't be displayed properly. // ==> decide what you prefer and adjust your code accordingly. } } // default filter signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE); // subfilter for basic and PAdES Part 2 signatures signature.setSubFilter(PDSignature.SUBFILTER_ADBE_PKCS7_DETACHED); PDVisibleSignDesigner visibleSignDesigner = new PDVisibleSignDesigner(signatureInputStream); PDVisibleSigProperties visibleSigProps = new PDVisibleSigProperties(); visibleSigProps // .signerName(name) // TODO add.. // .signerLocation(location) // TODO add. // .signatureReason(reason) // .preferredSize(preferredSize) // .page(0) // TODO fix .visualSignEnabled(true).setPdVisibleSignature(visibleSignDesigner); visibleSigProps.buildSignature(); signature.setName(visibleSigProps.getSignerName()); signature.setLocation(visibleSigProps.getSignerLocation()); signature.setReason(visibleSigProps.getSignatureReason()); // the signing date, needed for valid signature signature.setSignDate(Calendar.getInstance()); SignatureOptions sigOptions = new SignatureOptions(); sigOptions.setVisualSignature(visibleSigProps.getVisibleSignature()); sigOptions.setPage(visibleSigProps.getPage() - 1); doc.addSignature(signature, this, sigOptions); return sigOptions; } /** * Build {@link FormJSON} from {@link PDDocument}. * @param doc {@link PDDocument} * @param texts {@link List} of {@link PdfTextField} * @return {@link FormJSON} */ private FormJSON buildFormJSON(final PDDocument doc, final List<PdfTextField> texts) { String title = doc.getDocumentInformation().getTitle(); if (isEmpty(title)) { float maxFont = texts.stream().map(s -> Float.valueOf(s.getFontSize())).max(Float::compare).get() .floatValue(); StringBuilder sb = new StringBuilder(); for (PdfTextField text : texts) { if ((int) text.getFontSize() == (int) maxFont) { sb.append(text.getText() + " "); } } title = sb.toString(); } title = isEmpty(title) ? "Untitled" : title.trim(); FormJSON form = ObjectBuilder.buildFormJSON(title); return form; } /** * Build {@link FormJSONSection}. * * @param form {@link FormJSON} * @param page {@link PDPage} * @param fields {@link List} of {@link PDField} * @param texts {@link List} of {PdfTextField} * @param lineRects {@link List} of {@link PDRectangle} * @return {@link Map} of {@link PDPage} to {@link PdfTextField} */ private Map<PDField, FormJSONField> buildFormSection(final FormJSON form, final PDPage page, final List<PDField> fields, final List<PdfTextField> texts, final List<PDRectangle> lineRects) { Map<UUID, List<PdfTextField>> relatedTextMap = getRelatedTextMap(texts); List<PdfFieldMatch> matches = generateMatches(page, fields, texts, lineRects); matchTextToPdField(matches); removeMatchedTexts(matches, texts); int avgFont = getAverageFontSize(texts) + 1; List<PdfTextField> headers = findSectionHeaders(texts, avgFont); return createFormFields(form, matches, headers, relatedTextMap, avgFont); } /** * Removed Matched Text. * @param matches {@link List} of {@link PdfFieldMatch} * @param texts {@link List} of {@link PdfTextField} */ private void removeMatchedTexts(final List<PdfFieldMatch> matches, final List<PdfTextField> texts) { for (PdfFieldMatch t : matches) { for (PdfTextFieldMatch m : t.getMatches()) { texts.remove(m.getTextField()); } for (PdAnnotationWidgetMatch w : t.getWidgets()) { for (PdfTextFieldMatch m : w.getMatches()) { texts.remove(m.getTextField()); } } } } /** * Create {@link List} of {@link WorkflowOutputPdfFormField}. * @param form {@link FormJSON} * @param fields {@link List} of {@link PDField} * @param map {@link Map} of {@link PDField} to {@link FormJSONField} * @return {@link List} of {@link WorkflowOutputPdfFormField} */ private List<WorkflowOutputFormField> createFieldOutputs(final FormJSON form, final List<PDField> fields, final Map<PDField, FormJSONField> map) { List<WorkflowOutputFormField> list = new ArrayList<>(); for (PDField pdfield : fields) { FormJSONField field = map.get(pdfield); WorkflowOutputFormField wo = buildWorkflowOuputFormField(form, field, pdfield.getFullyQualifiedName()); list.add(wo); } return list; } /** * Create {@link FormJSONField} from {@link PDField}. * * @param pdffield {@link PdfFieldMatch} * @param relatedTextMap {@link Map} of {@link PdfTextField} * @return {@link FormJSONField} */ private FormJSONField createFormField(final PdfFieldMatch pdffield, final Map<UUID, List<PdfTextField>> relatedTextMap) { FormJSONField field = new FormJSONField(); field.setRequired(FormJSONRequiredType.OPTIONAL); PDField pdfield = pdffield.getField(); field.setLabel(getLabel(pdffield, relatedTextMap)); if (pdfield instanceof PDComboBox) { PDComboBox box = (PDComboBox) pdfield; field.setType(SELECTBOX); field.setOptions(box.getOptions()); } else if (pdfield instanceof PDCheckBox) { PDCheckBox c = (PDCheckBox) pdfield; field.setOptions(c.getOnValues().stream().map(s -> s + "[" + s + "]").collect(Collectors.toList())); if (pdffield.getWidgets().size() == c.getOnValues().size()) { List<String> options = field.getOptions(); for (int i = 0; i < options.size(); i++) { PdAnnotationWidgetMatch wm = pdffield.getWidgets().get(i); if (!wm.getMatches().isEmpty()) { String v = extractLabelAndValue(options.get(i)).getRight(); options.set(i, wm.getMatches().get(0).getTextField().getText().trim() + "[" + v + "]"); } } } field.setType(SWITCH); } else if (pdfield instanceof PDSignatureField) { field.setType(SIGNATURE); } else { field.setType(TEXTBOX); } return field; } /** * Transfers {@link PDField} to {@link FormJSONField}. * * @param form {@link FormJSON} * @param pdfields {@link List} of {@link PdfFieldMatch} * @param headers {@link List} of {@link PdfTextField} * @param relatedTextMap {@link Map} of {@link PdfTextField} * @param avgFont int * @return {@link List} */ private Map<PDField, FormJSONField> createFormFields(final FormJSON form, final List<PdfFieldMatch> pdfields, final List<PdfTextField> headers, final Map<UUID, List<PdfTextField>> relatedTextMap, final int avgFont) { int id = getNextFormJSONFieldId(form); boolean first = true; PdfTextField lastheader = null; FormJSONSection section = buildFormJSONSection(null, null); Map<PDField, FormJSONField> map = new HashMap<>(); Iterator<PdfFieldMatch> itr = pdfields.iterator(); while (itr.hasNext()) { PdfFieldMatch pdfield = itr.next(); PdfTextField header = findClosestHeader(headers, pdfield, avgFont); if (first || (lastheader != null && !lastheader.equals(header))) { String title = null; if (header != null) { title = header.getText().replaceAll("\t", " "); } section = buildFormJSONSection(title, new ArrayList<>()); form.getSections().add(section); lastheader = header; first = false; } FormJSONField field = createFormField(pdfield, relatedTextMap); field.setId(id++); section.getFields().add(field); if (pdfield.getField() != null) { map.put(pdfield.getField(), field); } } return map; } /** * Find Header for {@link PdfFieldMatch}. * @param headers {@link List} of {@link PdfTextField} * @param pdfield {@link PdfFieldMatch} * @param fontSize float * @return {@link PdfTextField} */ private PdfTextField findClosestHeader(final List<PdfTextField> headers, final PdfFieldMatch pdfield, final float fontSize) { PdfTextField match = null; PDRectangle rect = pdfield.getWidgets().get(0).getWidget().getRectangle(); for (PdfTextField tf : headers) { if (fontSize == -1 || tf.getFontSize() >= fontSize) { if (tf.getRectangle().getLowerLeftY() > rect.getLowerLeftY()) { match = tf; } else { break; } } } return match; } /** * Find Possible Matches for {@link PdfTextField} and sorted by best match. * @param rect {@link PDRectangle} * @param fields {@link List} * @param searchAreas {@link List} of {@link PDFieldSearchRectangle} * @return {@link List} */ private List<PdfTextFieldMatch> findPossibleMatches(final PDRectangle rect, final List<PdfTextField> fields, final List<PDFieldSearchRectangle> searchAreas) { List<PdfTextFieldMatch> rects = new ArrayList<>(); List<PdfTextFieldMatch> list = new ArrayList<>(); for (PdfTextField field : fields) { float match = getDistanceBetween(rect, field.getRectangle()); for (PDFieldSearchRectangle sa : searchAreas) { if (isIntersection(field.getRectangle(), sa.getRectangle())) { if (PDFieldAreaSearch.RECTANGLE.equals(sa.getType())) { addIfNotNull(rects, new PdfTextFieldMatch(match, field, sa.getType())); } else { addIfNotNull(list, new PdfTextFieldMatch(match, field, sa.getType())); } } } } list = !rects.isEmpty() ? rects : list; Collections.sort(list, this.matchComparator); return list; } /** * Find Section Headers. * @param texts {@link List} of {@link PdfTextField} * @param avgFont int * @return {@link List} */ private List<PdfTextField> findSectionHeaders(final List<PdfTextField> texts, final int avgFont) { List<PdfTextField> headers = new ArrayList<>(); if (avgFont > 0) { LOG.log(Level.FINE, "-> header section avg font size " + avgFont); for (PdfTextField tf : texts) { String text = tf.getText(); if (tf.getFontSize() > avgFont && Strings.hasAtLeast1Letter(text) && text.length() > 1) { LOG.log(Level.FINE, "-> found possible header section " + tf.getText()); headers.add(tf); } } } return headers; } /** * Find Text for {@link PDAnnotationWidget}. * @param widget {@link PDAnnotationWidget} * @param fields {@link List} * @param searchAreas {@link List} of {@link PDFieldSearchRectangle} * @return {@link PdAnnotationWidgetMatch} */ private List<PdfTextFieldMatch> findTextForWidget(final PDAnnotationWidget widget, final List<PdfTextField> fields, final List<PDFieldSearchRectangle> searchAreas) { PDRectangle rect = widget.getRectangle(); List<PdfTextFieldMatch> matches = findPossibleMatches(rect, fields, searchAreas); return matches; } /** * Generate Matches between {@link PDField} and PDF Text. * * @param page {@link PDPage} * @param pdFields {@link List} of {@link PDField} * @param textOnPage {@link List} of {@link PdfTextField} * @param lineRects {@link List} of {@link PDRectangle} * @return {@link List} */ private List<PdfFieldMatch> generateMatches(final PDPage page, final List<PDField> pdFields, final List<PdfTextField> textOnPage, final List<PDRectangle> lineRects) { List<PdfFieldMatch> list = new ArrayList<>(); for (PDField pdField : pdFields) { PdfFieldMatch match = new PdfFieldMatch(); match.setField(pdField); match.setWidgets(new ArrayList<>()); match.setMatches(new ArrayList<>()); list.add(match); for (PDAnnotationWidget w : pdField.getWidgets()) { PdAnnotationWidgetMatch m = new PdAnnotationWidgetMatch(); m.setWidget(w); match.getWidgets().add(m); List<PDFieldSearchRectangle> searchAreas = getTextSearchArea(page, pdField, w, lineRects); List<PdfTextFieldMatch> matches = findTextForWidget(w, textOnPage, searchAreas); m.setMatches(matches); } updatePDCheckBoxMatches(pdField, match); } return list; } /** * Adjust Matches for {@link PDCheckBox} fields. * * @param pdField {@link PDField} * @param match {@link PdfFieldMatch} */ private void updatePDCheckBoxMatches(final PDField pdField, final PdfFieldMatch match) { if (pdField instanceof PDCheckBox) { PDCheckBox c = (PDCheckBox) pdField; List<String> onValues = c.getOnValues().stream().map(s -> s.trim().toUpperCase()) .collect(Collectors.toList()); if (pdField.getWidgets().size() == onValues.size() && onValues.size() > 1) { List<PdfTextFieldMatch> bestmatch = new ArrayList<>(); for (int i = 0; i < match.getWidgets().size(); i++) { PdAnnotationWidgetMatch w = match.getWidgets().get(i); String onvalue = onValues.get(i).trim().toUpperCase(); // check to see if 1st match matches Checkbox Value if (!w.getMatches().isEmpty() && (w.getMatches().get(0).getTextField().getText().toUpperCase().startsWith(onvalue)) || w.getMatches().get(0).getTextField().getText().toUpperCase().endsWith(onvalue)) { bestmatch.add(w.getMatches().get(0)); // check to see if 2nd match matches Checkbox Value } else if (w.getMatches().size() > 1 && (w.getMatches().get(1).getTextField().getText().toUpperCase().startsWith(onvalue)) || w.getMatches().get(1).getTextField().getText().toUpperCase().endsWith(onvalue)) { bestmatch.add(w.getMatches().get(1)); } } if (bestmatch.size() == onValues.size()) { for (PdfTextFieldMatch bm : bestmatch) { bm.setMatch(0); } for (PdAnnotationWidgetMatch m : match.getWidgets()) { Collections.sort(m.getMatches(), this.matchComparator); } } } } } /** * Get the average Font size. * @param texts {@link List} of {@link PdfTextField} * @return int */ private int getAverageFontSize(final List<PdfTextField> texts) { int fontsize = 0; for (PdfTextField tf : texts) { fontsize += Math.round(tf.getFontSize()); } return !texts.isEmpty() ? fontsize / texts.size() : -1; } /** * Generate {@link Map} {@link COSDictionary} to Page Numbers. * @param doc {@link PDDocument} * @return {@link Map} of {@link COSDictionary} to {@link Integer} * @throws IOException IOException */ private Map<COSDictionary, Integer> getCOSDictionaryToPageNumberMap(final PDDocument doc) throws IOException { Map<COSDictionary, Integer> map = new HashMap<>(); PDPageTree pages = doc.getPages(); for (int i = 0; i < pages.getCount(); i++) { for (PDAnnotation annotation : pages.get(i).getAnnotations()) { COSDictionary annotationObject = annotation.getCOSObject(); map.put(annotationObject, Integer.valueOf(i)); } } return map; } /** * Returns Label from {@link PDField}. * * @param pdffield {@link PdfFieldMatch} * @param relatedTextMap {@link Map} of {@link PdfTextField} * @return {@link String} */ private String getLabel(final PdfFieldMatch pdffield, final Map<UUID, List<PdfTextField>> relatedTextMap) { List<PdfTextFieldMatch> matches = pdffield.getMatches(); String text = !matches.isEmpty() ? matches.get(0).getTextField().getText().trim() : null; if (!matches.isEmpty()) { PdfTextFieldMatch match = matches.get(0); PdfTextField tf = match.getTextField(); if (relatedTextMap.containsKey(tf.getUUID())) { text = relatedTextMap.get(tf.getUUID()).stream().map(f -> f.getText().trim()) .collect(Collectors.joining(" ")).trim(); } } if (isEmpty(text)) { text = pdffield.getField().getFullyQualifiedName(); } return text; } @Override public WorkflowOutputDocument getNewWorkflowOutputDocument() { return new WorkflowOutputPdfForm(); } @Override public Pair<FormJSON, List<WorkflowOutputFormField>> getOutputFormFields(final String filename, final byte[] data) throws IOException { List<WorkflowOutputFormField> wofields = new ArrayList<>(); PDDocument doc = loadPDF(data); try { Map<COSDictionary, Integer> obMap = getCOSDictionaryToPageNumberMap(doc); Map<Integer, List<PDField>> pdMap = getPDFields(doc, obMap); Map<Integer, List<PdfTextField>> textsMap = getTextMap(doc); PDPageTree pages = doc.getDocumentCatalog().getPages(); FormJSON form = buildFormJSON(doc, textsMap.get(Integer.valueOf(0))); for (int i = 0; i < pages.getCount(); i++) { PDPage page = pages.get(i); Integer pageNum = Integer.valueOf(i); List<PDField> fields = pdMap.getOrDefault(pageNum, emptyList()); List<PdfTextField> texts = getTextForPage(textsMap, pageNum); List<PDRectangle> lineRects = getPageLinePaths(pages.get(i)); Map<PDField, FormJSONField> fieldMap = buildFormSection(form, page, fields, texts, lineRects); List<WorkflowOutputFormField> outfields = createFieldOutputs(form, fields, fieldMap); wofields.addAll(outfields); } return Pair.of(form, wofields); } finally { doc.close(); } } /** * Get the Page Line Paths. * @param page {@link PDPage} * @return {@link List} of {@link PDRectangle} */ private List<PDRectangle> getPageLinePaths(final PDPage page) { List<PDRectangle> rects = new ArrayList<>(); try { PdfLinePaths linePaths = new PdfLinePaths(page); linePaths.processPage(); List<GeneralPath> lines = linePaths.getLinePaths(); for (GeneralPath gp : lines) { Rectangle r = gp.getBounds(); rects.add(new PDRectangle(r.x, r.y, r.width, r.height)); } } catch (IOException e) { LOG.log(Level.WARNING, "Unable to find Page Line Paths", e); rects = Collections.emptyList(); } return rects; } /** * Get Page Number for {@link PDField}. * @param objMap {@link Map} of {@link COSDictionary} and Page Number * @param field {@link PDField} * @return {@link Integer} */ private Integer getPageNumber(final Map<COSDictionary, Integer> objMap, final PDField field) { return getPageNumber(objMap, field, field.getWidgets().get(0)); } /** * Get Page Number for {@link PDField}. * @param objMap {@link Map} of {@link COSDictionary} and Page Number * @param field {@link PDField} * @param widget {@link PDAnnotationWidget} * @return {@link Integer} */ private Integer getPageNumber(final Map<COSDictionary, Integer> objMap, final PDField field, final PDAnnotationWidget widget) { COSDictionary cosObject = widget.getCOSObject(); if (objMap.containsKey(cosObject)) { return objMap.get(cosObject); } throw new PdfConversionException("cannot find page for " + field.getFullyQualifiedName()); } /** * Take {@link PDField} objects from {@link PDDocument} * and create {@link PDField} from them. * * @param doc {@link PDDocument} * @param objMap {@link Map} of {@link COSDictionary} and Page Number * @return {@link Map} of {@link PDField} by page number * @throws IOException IOException */ private Map<Integer, List<PDField>> getPDFields(final PDDocument doc, final Map<COSDictionary, Integer> objMap) throws IOException { PDDocumentCatalog dc = doc.getDocumentCatalog(); PDAcroForm pdform = dc.getAcroForm(); Map<Integer, List<PDField>> map = new HashMap<>(); for (PDField field : pdform.getFields()) { if (field instanceof PDPushButton) { LOG.log(Level.FINE, "skip addFieldToPageMap='" + field.getFullyQualifiedName() + "',class=" + field.getClass().getName()); continue; } addFieldToPageMap(objMap, field, map); } for (Map.Entry<Integer, List<PDField>> e : map.entrySet()) { Collections.sort(e.getValue(), new PDFieldComparator()); } return map; } /** * Gets the Text for Page and join really close text together. * @param texts {@link List} of {@link PdfTextField} * @return {@link Map} of {@link UUID} and {@link PdfTextField} */ private Map<UUID, List<PdfTextField>> getRelatedTextMap(final List<PdfTextField> texts) { List<List<PdfTextField>> list = CollectionUtil.groupBy(texts, new PdfTextFieldTextXYComparator(), new PdfTextFieldTextGroupComparator()); Map<UUID, List<PdfTextField>> relatedTextMap = new HashMap<>(); for (List<PdfTextField> tomerge : list) { if (tomerge.size() > 1) { for (PdfTextField tf : tomerge) { relatedTextMap.put(tf.getUUID(), tomerge); } } } return relatedTextMap; } /** * Gets Text for Page and merges text together where appropriate. * * @param textsMap * {@link Map} of Page Number and {@link List} of * {@link PdfTextField} * @param pageNum * {@link Integer} * @return {@link List} of {@link PdfTextField} */ private List<PdfTextField> getTextForPage(final Map<Integer, List<PdfTextField>> textsMap, final Integer pageNum) { List<PdfTextField> texts = new ArrayList<>(textsMap.getOrDefault(pageNum, emptyList())); texts = joinHorizontalSimilarText(texts); return texts; } /** * Get a Text Map from {@link PDDocument}. * @param doc {@link PDDocument} * @return {@link Map} * @throws IOException IOException */ public Map<Integer, List<PdfTextField>> getTextMap(final PDDocument doc) throws IOException { TextToPDFieldMapper mapper = new TextToPDFieldMapper(); mapper.setSortByPosition(true); mapper.getText(doc); Map<Integer, List<PdfTextField>> texts = mapper.getTextLocations(); return texts; } /** * Finds the Search area to look for {@link PDField} Text Label. * * @param page {@link PDPage} * @param pdField {@link PDField} * @param widget {@link PDAnnotationWidget} * @param lineRects {@link List} of {@link PDRectangle} * @return {@link List} of {@link PDFieldSearchRectangle} */ private List<PDFieldSearchRectangle> getTextSearchArea(final PDPage page, final PDField pdField, final PDAnnotationWidget widget, final List<PDRectangle> lineRects) { List<PDFieldSearchRectangle> area = new ArrayList<>(); for (TextSearchAreaFilter filter : this.textSearchFilters) { area.addAll(filter.getTextSearchArea(page, pdField, widget, lineRects)); } return area; } @Override public boolean isSupported(final WorkflowOutput wo) { return wo instanceof WorkflowOutputPdfForm; } /** * Join similar horizontal text together. * @param texts {@link List} of {@link PdfTextField} * @return {@link List} of {@link PdfTextField} */ private List<PdfTextField> joinHorizontalSimilarText(final List<PdfTextField> texts) { List<PdfTextField> ret = new ArrayList<>(texts); PdfTextFieldTextXYComparator xy = new PdfTextFieldTextXYComparator(); List<List<PdfTextField>> list = CollectionUtil.groupBy(ret, new PdfTextFieldTextYXComparator(), new PdfTextFieldHorizontalGroupComparator()); for (List<PdfTextField> tomerge : list) { if (tomerge.size() > 1) { Collections.sort(tomerge, xy); String s = tomerge.stream().map(p -> p.getText()).collect(Collectors.joining(" ")); PDRectangle rect = calculate( tomerge.stream().map(p -> p.getRectangle()).collect(Collectors.toList())); PdfTextField tf = tomerge.get(0); tf.setText(s); tf.setRectangle(rect); for (int i = 1; i < tomerge.size(); i++) { ret.remove(tomerge.get(i)); } } } return ret; } /** * Load PDDocument. * @param data byte[] * @return {@link PDDocument} * @throws IOException IOException */ public PDDocument loadPDF(final byte[] data) throws IOException { return PDDocument.load(new ByteArrayInputStream(data)); } /** * Create Map of Text to {@link PDAnnotationWidget} that best matches. * @param matches {@link List} */ private void matchTextToPdField(final List<PdfFieldMatch> matches) { // sets all widget matches to PdfFieldMatch matches.forEach(t -> t.setMatches(t.getWidgets().stream().flatMap(s -> s.getMatches().stream()).distinct() .collect(Collectors.toList()))); List<PdAnnotationWidgetMatch> widgets = matches.stream().flatMap(s -> s.getWidgets().stream()) .collect(Collectors.toList()); removeWithBetterMatches(widgets); widgets.forEach(t -> t.setMatches( !t.getMatches().isEmpty() ? Arrays.asList(t.getMatches().get(0)) : Collections.emptyList())); for (PdfFieldMatch fm : matches) { if (fm.getField() instanceof PDCheckBox && fm.getWidgets().size() > 1) { updatePDCheckBoxPdfTextFieldMatch(fm); Collections.sort(fm.getMatches(), this.matchComparator); if (!fm.getMatches().isEmpty()) { fm.setMatches(Arrays.asList(fm.getMatches().get(0))); } } else if (!fm.getWidgets().isEmpty()) { fm.setMatches(fm.getWidgets().get(0).getMatches()); } } } /** * Remove {@link PdfFieldMatch} {@link PdfTextFieldMatch} that occur in * the {@link PdAnnotationWidgetMatch}. * * @param fm {@link PdfFieldMatch} */ private void updatePDCheckBoxPdfTextFieldMatch(final PdfFieldMatch fm) { Optional<Float> maxX = fm.getWidgets().stream() .map(s -> Float.valueOf(s.getWidget().getRectangle().getUpperRightX())) .collect(Collectors.maxBy(Float::compareTo)); List<PdfTextFieldMatch> list = fm.getWidgets().stream().flatMap(s -> s.getMatches().stream()) .collect(Collectors.toList()); Set<PdfTextField> set = list.stream().map(s -> s.getTextField()).collect(Collectors.toSet()); Iterator<PdfTextFieldMatch> itr = fm.getMatches().iterator(); while (itr.hasNext()) { PdfTextFieldMatch m = itr.next(); if (m.getMatch() < 0 || set.contains(m.getTextField())) { itr.remove(); } else if (maxX.isPresent()) { if (maxX.get().floatValue() < m.getTextField().getRectangle().getLowerLeftX()) { itr.remove(); } } } Collections.sort(list, this.matchComparator); } /** * Remove duplicate {@link PdfTextField} that are better matches. * * @param widgets {@link List} of {@link PdAnnotationWidgetMatch} */ private void removeWithBetterMatches(final List<PdAnnotationWidgetMatch> widgets) { for (PdAnnotationWidgetMatch widget : widgets) { Iterator<PdfTextFieldMatch> itr = widget.getMatches().iterator(); while (itr.hasNext()) { if (hasBetterMatch(itr.next(), widget, widgets)) { itr.remove(); } } } } /** * Has Better {@link PdfTextFieldMatch}. * * @param textfield {@link PdfTextFieldMatch} * @param widget {@link PdAnnotationWidgetMatch} * @param widgets {@link List} of {@link PdAnnotationWidgetMatch} * @return boolean */ private boolean hasBetterMatch(final PdfTextFieldMatch textfield, final PdAnnotationWidgetMatch widget, final List<PdAnnotationWidgetMatch> widgets) { for (PdAnnotationWidgetMatch w : widgets) { if (!w.equals(widget)) { for (PdfTextFieldMatch match : w.getMatches()) { if (match.getMatch() < textfield.getMatch() && textfield.getTextField().equals(match.getTextField())) { return true; } } } } return false; } @Override public void postGenerateCallback(final ArchiveDTO archive, final FormJSON form) throws IOException { if (hasFieldType(form, SIGNATURE)) { FormBuiltInObjectBuilder builder = new FormBuiltInObjectBuilder(); FormJSON signatureForm = builder.getSignatureForm(this.jsonService); archive.addStep(signatureForm); List<FormJSONField> sfields = findFieldsByType(form, SIGNATURE); removeFields(form, sfields); for (FormJSONField sigfield : sfields) { signatureForm.getSections().get(0).getFields().add(sigfield); Optional<WorkflowOutputFormField> op = findWorkflowOutputDocument(archive.getWorkflow(), form, sigfield); if (op.isPresent()) { op.get().setForm(signatureForm.getName() + "[" + signatureForm.getUUID() + "]"); } } } } @Override public byte[] sign(final InputStream content) throws IOException { try { KeyPair key = this.propertyStore.getKeyPair(); PrivateKey privKey = key.getPrivate(); Certificate certificate = this.propertyStore.getCertificate(key); CMSSignedDataGenerator gen = new CMSSignedDataGenerator(); org.bouncycastle.asn1.x509.Certificate cert = org.bouncycastle.asn1.x509.Certificate .getInstance(certificate.getEncoded()); ContentSigner sha1Signer = new JcaContentSignerBuilder("SHA256WithRSA").build(privKey); gen.addSignerInfoGenerator( new JcaSignerInfoGeneratorBuilder(new JcaDigestCalculatorProviderBuilder().build()) .build(sha1Signer, new X509CertificateHolder(cert))); CMSProcessableByteArray msg = new CMSProcessableByteArray(IOUtils.toByteArray(content)); CMSSignedData signedData = gen.generate(msg, false); return signedData.getEncoded(); } catch (GeneralSecurityException | CMSException | OperatorCreationException e) { throw new IOException(e); } } }