Example usage for org.apache.pdfbox.cos COSDictionary getDictionaryObject

List of usage examples for org.apache.pdfbox.cos COSDictionary getDictionaryObject

Introduction

In this page you can find the example usage for org.apache.pdfbox.cos COSDictionary getDictionaryObject.

Prototype

public COSBase getDictionaryObject(COSName key) 

Source Link

Document

This will get an object from this dictionary.

Usage

From source file:org.apache.tika.parser.pdf.PDFParser.java

License:Apache License

private void extractMetadata(PDDocument document, Metadata metadata) throws TikaException {

    //first extract AccessPermissions
    AccessPermission ap = document.getCurrentAccessPermission();
    metadata.set(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY,
            Boolean.toString(ap.canExtractForAccessibility()));
    metadata.set(AccessPermissions.EXTRACT_CONTENT, Boolean.toString(ap.canExtractContent()));
    metadata.set(AccessPermissions.ASSEMBLE_DOCUMENT, Boolean.toString(ap.canAssembleDocument()));
    metadata.set(AccessPermissions.FILL_IN_FORM, Boolean.toString(ap.canFillInForm()));
    metadata.set(AccessPermissions.CAN_MODIFY, Boolean.toString(ap.canModify()));
    metadata.set(AccessPermissions.CAN_MODIFY_ANNOTATIONS, Boolean.toString(ap.canModifyAnnotations()));
    metadata.set(AccessPermissions.CAN_PRINT, Boolean.toString(ap.canPrint()));
    metadata.set(AccessPermissions.CAN_PRINT_DEGRADED, Boolean.toString(ap.canPrintDegraded()));

    //now go for the XMP
    org.apache.jempbox.xmp.XMPMetadata xmp = null;
    XMPSchemaDublinCore dcSchema = null;
    XMPSchemaMediaManagement mmSchema = null;
    try {//ww w  .ja  v a  2 s.c  o  m
        if (document.getDocumentCatalog().getMetadata() != null) {
            xmp = document.getDocumentCatalog().getMetadata().exportXMPMetadata();
        }
    } catch (IOException e) {
    }

    if (xmp != null) {
        try {
            dcSchema = xmp.getDublinCoreSchema();
        } catch (IOException e) {
        }

        JempboxExtractor.extractXMPMM(xmp, metadata);
    }

    PDDocumentInformation info = document.getDocumentInformation();
    metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
    extractMultilingualItems(metadata, TikaCoreProperties.TITLE, info.getTitle(), dcSchema);
    extractDublinCoreListItems(metadata, TikaCoreProperties.CREATOR, info.getAuthor(), dcSchema);
    extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, dcSchema);
    addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator());
    addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords());
    addMetadata(metadata, "producer", info.getProducer());
    extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema);

    // TODO: Move to description in Tika 2.0
    addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject());
    addMetadata(metadata, "trapped", info.getTrapped());
    try {
        // TODO Remove these in Tika 2.0
        addMetadata(metadata, "created", info.getCreationDate());
        addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate());
    } catch (IOException e) {
        // Invalid date format, just ignore
    }
    try {
        Calendar modified = info.getModificationDate();
        addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
        addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
    } catch (IOException e) {
        // Invalid date format, just ignore
    }

    // All remaining metadata is custom
    // Copy this over as-is
    List<String> handledMetadata = Arrays.asList("Author", "Creator", "CreationDate", "ModDate", "Keywords",
            "Producer", "Subject", "Title", "Trapped");
    for (COSName key : info.getDictionary().keySet()) {
        String name = key.getName();
        if (!handledMetadata.contains(name)) {
            addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
        }
    }

    //try to get the various versions
    //Caveats:
    //    there is currently a fair amount of redundancy
    //    TikaCoreProperties.FORMAT can be multivalued
    //    There are also three potential pdf specific version keys: pdf:PDFVersion, pdfa:PDFVersion, pdf:PDFExtensionVersion        
    metadata.set("pdf:PDFVersion", Float.toString(document.getDocument().getVersion()));
    metadata.add(TikaCoreProperties.FORMAT.getName(),
            MEDIA_TYPE.toString() + "; version=" + Float.toString(document.getDocument().getVersion()));

    try {
        if (xmp != null) {
            xmp.addXMLNSMapping(XMPSchemaPDFAId.NAMESPACE, XMPSchemaPDFAId.class);
            XMPSchemaPDFAId pdfaxmp = (XMPSchemaPDFAId) xmp.getSchemaByClass(XMPSchemaPDFAId.class);
            if (pdfaxmp != null) {
                if (pdfaxmp.getPart() != null) {
                    metadata.set("pdfaid:part", Integer.toString(pdfaxmp.getPart()));
                }
                if (pdfaxmp.getConformance() != null) {
                    metadata.set("pdfaid:conformance", pdfaxmp.getConformance());
                    String version = "A-" + pdfaxmp.getPart()
                            + pdfaxmp.getConformance().toLowerCase(Locale.ROOT);
                    metadata.set("pdfa:PDFVersion", version);
                    metadata.add(TikaCoreProperties.FORMAT.getName(),
                            MEDIA_TYPE.toString() + "; version=\"" + version + "\"");
                }
            }
            // TODO WARN if this XMP version is inconsistent with document header version?          
        }
    } catch (IOException e) {
        metadata.set(TikaCoreProperties.TIKA_META_PREFIX + "pdf:metadata-xmp-parse-failed", "" + e);
    }
    //TODO: Let's try to move this into PDFBox.
    //Attempt to determine Adobe extension level, if present:
    COSDictionary root = document.getDocumentCatalog().getCOSDictionary();
    COSDictionary extensions = (COSDictionary) root.getDictionaryObject(COSName.getPDFName("Extensions"));
    if (extensions != null) {
        for (COSName extName : extensions.keySet()) {
            // If it's an Adobe one, interpret it to determine the extension level:
            if (extName.equals(COSName.getPDFName("ADBE"))) {
                COSDictionary adobeExt = (COSDictionary) extensions.getDictionaryObject(extName);
                if (adobeExt != null) {
                    String baseVersion = adobeExt.getNameAsString(COSName.getPDFName("BaseVersion"));
                    int el = adobeExt.getInt(COSName.getPDFName("ExtensionLevel"));
                    //-1 is sentinel value that something went wrong in getInt
                    if (el != -1) {
                        metadata.set("pdf:PDFExtensionVersion", baseVersion + " Adobe Extension Level " + el);
                        metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=\""
                                + baseVersion + " Adobe Extension Level " + el + "\"");
                    }
                }
            } else {
                // WARN that there is an Extension, but it's not Adobe's, and so is a 'new' format'.
                metadata.set("pdf:foundNonAdobeExtensionName", extName.getName());
            }
        }
    }
}

From source file:org.apache.tika.parser.pdf.PDFPureJavaParser.java

License:Apache License

private void extractMetadata(PDDocument document, Metadata metadata, ParseContext context)
        throws TikaException {

    //first extract AccessPermissions
    AccessPermission ap = document.getCurrentAccessPermission();
    metadata.set(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY,
            Boolean.toString(ap.canExtractForAccessibility()));
    metadata.set(AccessPermissions.EXTRACT_CONTENT, Boolean.toString(ap.canExtractContent()));
    metadata.set(AccessPermissions.ASSEMBLE_DOCUMENT, Boolean.toString(ap.canAssembleDocument()));
    metadata.set(AccessPermissions.FILL_IN_FORM, Boolean.toString(ap.canFillInForm()));
    metadata.set(AccessPermissions.CAN_MODIFY, Boolean.toString(ap.canModify()));
    metadata.set(AccessPermissions.CAN_MODIFY_ANNOTATIONS, Boolean.toString(ap.canModifyAnnotations()));
    metadata.set(AccessPermissions.CAN_PRINT, Boolean.toString(ap.canPrint()));
    metadata.set(AccessPermissions.CAN_PRINT_DEGRADED, Boolean.toString(ap.canPrintDegraded()));

    //now go for the XMP
    Document dom = loadDOM(document.getDocumentCatalog().getMetadata(), metadata, context);

    XMPMetadata xmp = null;//from w ww  .ja v a2s .  c om
    if (dom != null) {
        xmp = new XMPMetadata(dom);
    }
    XMPSchemaDublinCore dcSchema = null;

    /*if (xmp != null) {
    try {
        dcSchema = xmp.getDublinCoreSchema();
    } catch (IOException e) {}
            
    JempboxExtractor.extractXMPMM(xmp, metadata);
    }*/

    PDDocumentInformation info = document.getDocumentInformation();
    metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
    extractMultilingualItems(metadata, TikaCoreProperties.TITLE, info.getTitle(), dcSchema);
    addMetadata(metadata, PDF.DOC_INFO_TITLE, info.getTitle());
    extractDublinCoreListItems(metadata, TikaCoreProperties.CREATOR, info.getAuthor(), dcSchema);
    addMetadata(metadata, PDF.DOC_INFO_CREATOR, info.getAuthor());
    extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, dcSchema);
    addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator());
    addMetadata(metadata, PDF.DOC_INFO_CREATOR_TOOL, info.getCreator());
    addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords());
    addMetadata(metadata, PDF.DOC_INFO_KEY_WORDS, info.getKeywords());
    addMetadata(metadata, "producer", info.getProducer());
    addMetadata(metadata, PDF.DOC_INFO_PRODUCER, info.getProducer());
    extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema);

    addMetadata(metadata, PDF.DOC_INFO_SUBJECT, info.getSubject());

    // TODO: Move to description in Tika 2.0
    addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject());
    addMetadata(metadata, "trapped", info.getTrapped());
    addMetadata(metadata, PDF.DOC_INFO_TRAPPED, info.getTrapped());
    // TODO Remove these in Tika 2.0
    addMetadata(metadata, "created", info.getCreationDate());
    addMetadata(metadata, PDF.DOC_INFO_CREATED, info.getCreationDate());
    addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate());
    Calendar modified = info.getModificationDate();
    addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
    addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
    addMetadata(metadata, PDF.DOC_INFO_MODIFICATION_DATE, info.getModificationDate());

    // All remaining metadata is custom
    // Copy this over as-is
    List<String> handledMetadata = Arrays.asList("Author", "Creator", "CreationDate", "ModDate", "Keywords",
            "Producer", "Subject", "Title", "Trapped");
    for (COSName key : info.getCOSObject().keySet()) {
        String name = key.getName();
        if (!handledMetadata.contains(name)) {
            addMetadata(metadata, name, info.getCOSObject().getDictionaryObject(key));
            addMetadata(metadata, PDF.PDF_DOC_INFO_CUSTOM_PREFIX + name,
                    info.getCOSObject().getDictionaryObject(key));
        }
    }

    //try to get the various versions
    //Caveats:
    //    there is currently a fair amount of redundancy
    //    TikaCoreProperties.FORMAT can be multivalued
    //    There are also three potential pdf specific version keys: pdf:PDFVersion, pdfa:PDFVersion, pdf:PDFExtensionVersion
    metadata.set(PDF.PDF_VERSION, Float.toString(document.getDocument().getVersion()));
    metadata.add(TikaCoreProperties.FORMAT.getName(),
            MEDIA_TYPE.toString() + "; version=" + Float.toString(document.getDocument().getVersion()));

    try {
        if (xmp != null) {
            xmp.addXMLNSMapping(XMPSchemaPDFAId.NAMESPACE, XMPSchemaPDFAId.class);
            XMPSchemaPDFAId pdfaxmp = (XMPSchemaPDFAId) xmp.getSchemaByClass(XMPSchemaPDFAId.class);
            if (pdfaxmp != null) {
                if (pdfaxmp.getPart() != null) {
                    metadata.set(PDF.PDFAID_PART, Integer.toString(pdfaxmp.getPart()));
                }
                if (pdfaxmp.getConformance() != null) {
                    metadata.set(PDF.PDFAID_CONFORMANCE, pdfaxmp.getConformance());
                    String version = "A-" + pdfaxmp.getPart()
                            + pdfaxmp.getConformance().toLowerCase(Locale.ROOT);
                    metadata.set(PDF.PDFA_VERSION, version);
                    metadata.add(TikaCoreProperties.FORMAT.getName(),
                            MEDIA_TYPE.toString() + "; version=\"" + version + "\"");
                }
            }
            // TODO WARN if this XMP version is inconsistent with document header version?          
        }
    } catch (IOException e) {
        metadata.set(TikaCoreProperties.TIKA_META_PREFIX + "pdf:metadata-xmp-parse-failed", "" + e);
    }
    //TODO: Let's try to move this into PDFBox.
    //Attempt to determine Adobe extension level, if present:
    COSDictionary root = document.getDocumentCatalog().getCOSObject();
    COSDictionary extensions = (COSDictionary) root.getDictionaryObject(COSName.getPDFName("Extensions"));
    if (extensions != null) {
        for (COSName extName : extensions.keySet()) {
            // If it's an Adobe one, interpret it to determine the extension level:
            if (extName.equals(COSName.getPDFName("ADBE"))) {
                COSDictionary adobeExt = (COSDictionary) extensions.getDictionaryObject(extName);
                if (adobeExt != null) {
                    String baseVersion = adobeExt.getNameAsString(COSName.getPDFName("BaseVersion"));
                    int el = adobeExt.getInt(COSName.getPDFName("ExtensionLevel"));
                    //-1 is sentinel value that something went wrong in getInt
                    if (el != -1) {
                        metadata.set(PDF.PDF_EXTENSION_VERSION, baseVersion + " Adobe Extension Level " + el);
                        metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=\""
                                + baseVersion + " Adobe Extension Level " + el + "\"");
                    }
                }
            } else {
                // WARN that there is an Extension, but it's not Adobe's, and so is a 'new' format'.
                metadata.set("pdf:foundNonAdobeExtensionName", extName.getName());
            }
        }
    }
}

From source file:org.apache.tika.parser.pdf18.PDFParser.java

License:Apache License

private void extractMetadata(PDDocument document, Metadata metadata) throws TikaException {

    //first extract AccessPermissions
    AccessPermission ap = document.getCurrentAccessPermission();
    metadata.set(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY,
            Boolean.toString(ap.canExtractForAccessibility()));
    metadata.set(AccessPermissions.EXTRACT_CONTENT, Boolean.toString(ap.canExtractContent()));
    metadata.set(AccessPermissions.ASSEMBLE_DOCUMENT, Boolean.toString(ap.canAssembleDocument()));
    metadata.set(AccessPermissions.FILL_IN_FORM, Boolean.toString(ap.canFillInForm()));
    metadata.set(AccessPermissions.CAN_MODIFY, Boolean.toString(ap.canModify()));
    metadata.set(AccessPermissions.CAN_MODIFY_ANNOTATIONS, Boolean.toString(ap.canModifyAnnotations()));
    metadata.set(AccessPermissions.CAN_PRINT, Boolean.toString(ap.canPrint()));
    metadata.set(AccessPermissions.CAN_PRINT_DEGRADED, Boolean.toString(ap.canPrintDegraded()));

    //now go for the XMP
    org.apache.jempbox.xmp.XMPMetadata xmp = null;
    XMPSchemaDublinCore dcSchema = null;
    XMPSchemaMediaManagement mmSchema = null;
    try {//from  w  ww  . jav a  2  s . com
        if (document.getDocumentCatalog().getMetadata() != null) {
            xmp = document.getDocumentCatalog().getMetadata().exportXMPMetadata();
        }
    } catch (IOException e) {
    }

    if (xmp != null) {
        try {
            dcSchema = xmp.getDublinCoreSchema();
        } catch (IOException e) {
        }

        JempboxExtractor.extractXMPMM(xmp, metadata);
    }

    PDDocumentInformation info = document.getDocumentInformation();
    metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
    extractMultilingualItems(metadata, TikaCoreProperties.TITLE, info.getTitle(), dcSchema);
    extractDublinCoreListItems(metadata, TikaCoreProperties.CREATOR, info.getAuthor(), dcSchema);
    extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, dcSchema);
    addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator());
    addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords());
    addMetadata(metadata, "producer", info.getProducer());
    extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema);

    // TODO: Move to description in Tika 2.0
    addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject());
    addMetadata(metadata, "trapped", info.getTrapped());
    try {
        // TODO Remove these in Tika 2.0
        addMetadata(metadata, "created", info.getCreationDate());
        addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate());
    } catch (IOException e) {
        // Invalid date format, just ignore
    }
    try {
        Calendar modified = info.getModificationDate();
        addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
        addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
    } catch (IOException e) {
        // Invalid date format, just ignore
    }

    // All remaining metadata is custom
    // Copy this over as-is
    List<String> handledMetadata = Arrays.asList("Author", "Creator", "CreationDate", "ModDate", "Keywords",
            "Producer", "Subject", "Title", "Trapped");
    for (COSName key : info.getDictionary().keySet()) {
        String name = key.getName();
        if (!handledMetadata.contains(name)) {
            addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
        }
    }

    //try to get the various versions
    //Caveats:
    //    there is currently a fair amount of redundancy
    //    TikaCoreProperties.FORMAT can be multivalued
    //    There are also three potential pdf specific version keys: pdf:PDFVersion, pdfa:PDFVersion, pdf:PDFExtensionVersion
    metadata.set("pdf:PDFVersion", Float.toString(document.getDocument().getVersion()));
    metadata.add(TikaCoreProperties.FORMAT.getName(),
            MEDIA_TYPE.toString() + "; version=" + Float.toString(document.getDocument().getVersion()));

    try {
        if (xmp != null) {
            xmp.addXMLNSMapping(XMPSchemaPDFAId.NAMESPACE, XMPSchemaPDFAId.class);
            XMPSchemaPDFAId pdfaxmp = (XMPSchemaPDFAId) xmp.getSchemaByClass(XMPSchemaPDFAId.class);
            if (pdfaxmp != null) {
                if (pdfaxmp.getPart() != null) {
                    metadata.set("pdfaid:part", Integer.toString(pdfaxmp.getPart()));
                }
                if (pdfaxmp.getConformance() != null) {
                    metadata.set("pdfaid:conformance", pdfaxmp.getConformance());
                    String version = "A-" + pdfaxmp.getPart()
                            + pdfaxmp.getConformance().toLowerCase(Locale.ROOT);
                    metadata.set("pdfa:PDFVersion", version);
                    metadata.add(TikaCoreProperties.FORMAT.getName(),
                            MEDIA_TYPE.toString() + "; version=\"" + version + "\"");
                }
            }
            // TODO WARN if this XMP version is inconsistent with document header version?
        }
    } catch (IOException e) {
        metadata.set(TikaCoreProperties.TIKA_META_PREFIX + "pdf:metadata-xmp-parse-failed", "" + e);
    }
    //TODO: Let's try to move this into PDFBox.
    //Attempt to determine Adobe extension level, if present:
    COSDictionary root = document.getDocumentCatalog().getCOSDictionary();
    COSDictionary extensions = (COSDictionary) root.getDictionaryObject(COSName.getPDFName("Extensions"));
    if (extensions != null) {
        for (COSName extName : extensions.keySet()) {
            // If it's an Adobe one, interpret it to determine the extension level:
            if (extName.equals(COSName.getPDFName("ADBE"))) {
                COSDictionary adobeExt = (COSDictionary) extensions.getDictionaryObject(extName);
                if (adobeExt != null) {
                    String baseVersion = adobeExt.getNameAsString(COSName.getPDFName("BaseVersion"));
                    int el = adobeExt.getInt(COSName.getPDFName("ExtensionLevel"));
                    //-1 is sentinel value that something went wrong in getInt
                    if (el != -1) {
                        metadata.set("pdf:PDFExtensionVersion", baseVersion + " Adobe Extension Level " + el);
                        metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=\""
                                + baseVersion + " Adobe Extension Level " + el + "\"");
                    }
                }
            } else {
                // WARN that there is an Extension, but it's not Adobe's, and so is a 'new' format'.
                metadata.set("pdf:foundNonAdobeExtensionName", extName.getName());
            }
        }
    }
}

From source file:org.kuali.kra.printing.service.impl.PersonSignatureServiceImpl.java

License:Educational Community License

/**
 * This method is to remove interactive fields from the form.
 * @param pdfBytes//w  w  w .  j  a  va  2  s . co m
 * @return
 * @throws Exception
 */
protected ByteArrayOutputStream getFlattenedPdfForm(byte[] pdfBytes) throws Exception {
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    InputStream is = new ByteArrayInputStream(pdfBytes);
    PDDocument pdDoc = PDDocument.load(is);
    PDDocumentCatalog pdCatalog = pdDoc.getDocumentCatalog();
    PDAcroForm acroForm = pdCatalog.getAcroForm();
    COSDictionary acroFormDict = acroForm.getDictionary();
    COSArray fields = (COSArray) acroFormDict.getDictionaryObject("Fields");
    fields.clear();
    pdDoc.save(byteArrayOutputStream);
    return byteArrayOutputStream;
}

From source file:org.lockss.pdf.pdfbox.PdfBoxPage.java

License:Open Source License

@Override
public List<PdfToken> getAnnotations() {
    /*//ww w. j a v  a2s .  c o  m
     * IMPLEMENTATION NOTE
     * 
     * Annotations are just dictionaries, but because there are many
     * types, the PDFBox API defines a vast hierarchy of objects to
     * represent them. At this time, this is way too much detail for
     * this API, because only one type of annotation has a foreseeable
     * use case (the Link type). So for now, we are only representing
     * annotations as the dictionaries they are by circumventing the
     * PDAnnotation factory call in getAnnotations() (PDFBox 1.6.0:
     * PDPage line 780).
     */
    COSDictionary pageDictionary = pdPage.getCOSDictionary();
    COSArray annots = (COSArray) pageDictionary.getDictionaryObject(COSName.ANNOTS);
    if (annots == null) {
        return new ArrayList<PdfToken>();
    }
    return PdfBoxTokens.getArray(annots);
}

From source file:org.mustangproject.ZUGFeRD.ZUGFeRDExporter.java

License:Open Source License

/**
 * Embeds an external file (generic - any type allowed) in the PDF.
 *
 * @param doc          PDDocument to attach the file to.
 * @param filename     name of the file that will become attachment name in the PDF
 * @param relationship how the file relates to the content, e.g. "Alternative"
 * @param description  Human-readable description of the file content
 * @param subType      type of the data e.g. could be "text/xml" - mime like
 * @param data         the binary data of the file/attachment
 * @throws java.io.IOException// w  w  w  .  java2s  . com
 */
public void PDFAttachGenericFile(PDDocument doc, String filename, String relationship, String description,
        String subType, byte[] data) throws IOException {
    fileAttached = true;

    PDComplexFileSpecification fs = new PDComplexFileSpecification();
    fs.setFile(filename);

    COSDictionary dict = fs.getCOSObject();
    dict.setName("AFRelationship", relationship);
    dict.setString("UF", filename);
    dict.setString("Desc", description);

    ByteArrayInputStream fakeFile = new ByteArrayInputStream(data);
    PDEmbeddedFile ef = new PDEmbeddedFile(doc, fakeFile);
    ef.setSubtype(subType);
    ef.setSize(data.length);
    ef.setCreationDate(new GregorianCalendar());

    ef.setModDate(GregorianCalendar.getInstance());

    fs.setEmbeddedFile(ef);

    // In addition make sure the embedded file is set under /UF
    dict = fs.getCOSObject();
    COSDictionary efDict = (COSDictionary) dict.getDictionaryObject(COSName.EF);
    COSBase lowerLevelFile = efDict.getItem(COSName.F);
    efDict.setItem(COSName.UF, lowerLevelFile);

    // now add the entry to the embedded file tree and set in the document.
    PDDocumentNameDictionary names = new PDDocumentNameDictionary(doc.getDocumentCatalog());
    PDEmbeddedFilesNameTreeNode efTree = names.getEmbeddedFiles();
    if (efTree == null) {
        efTree = new PDEmbeddedFilesNameTreeNode();
    }

    Map<String, PDComplexFileSpecification> namesMap = new HashMap<>();

    Map<String, PDComplexFileSpecification> oldNamesMap = efTree.getNames();
    if (oldNamesMap != null) {
        for (String key : oldNamesMap.keySet()) {
            namesMap.put(key, oldNamesMap.get(key));
        }
    }
    namesMap.put(filename, fs);
    efTree.setNames(namesMap);

    names.setEmbeddedFiles(efTree);
    doc.getDocumentCatalog().setNames(names);

    // AF entry (Array) in catalog with the FileSpec
    COSArray cosArray = (COSArray) doc.getDocumentCatalog().getCOSObject().getItem("AF");
    if (cosArray == null) {
        cosArray = new COSArray();
    }
    cosArray.add(fs);
    COSDictionary dict2 = doc.getDocumentCatalog().getCOSObject();
    COSArray array = new COSArray();
    array.add(fs.getCOSObject()); // see below
    dict2.setItem("AF", array);
    doc.getDocumentCatalog().getCOSObject().setItem("AF", cosArray);
}

From source file:org.xmlcml.font.NonStandardFontManager.java

License:Apache License

private AMIFont lookupOrCreateFont(int level, COSDictionary dict) {
    /**/*from   w w w .j  a v a  2s .  c o  m*/
    Type = COSName{Font}
    Subtype = COSName{Type1}
    BaseFont = COSName{Times-Roman}
    Name = COSName{arXivStAmP}      
    LastChar = COSInt{32}
    Widths = COSArray{[COSInt{19}]}
    FirstChar = COSInt{32}
    FontMatrix = COSArray{[COSFloat{0.0121}, COSInt{0}, COSInt{0}, COSFloat{-0.0121}, COSInt{0}, COSInt{0}]}
    ToUnicode = COSDictionary{(COSName{Length}:COSInt{212}) (COSName{Filter}:COSName{FlateDecode}) }
    FontBBox = COSArray{[COSInt{0}, COSInt{0}, COSInt{1}, COSInt{1}]}
    Resources = COSDictionary{(COSName{ProcSet}:COSArray{[COSName{PDF}, COSName{ImageB}]}) }
    Encoding = COSDictionary{(COSName{Differences}:COSArray{[COSInt{32}, COSName{space}]}) (COSName{Type}:COSName{Encoding}) }
    CharProcs = COSDictionary{(COSName{space}:COSDictionary{(COSName{Length}:COSInt{67}) (COSName{Filter}:COSName{FlateDecode}) }) }*/

    AMIFont amiFont = null;
    String fontName = AMIFont.getFontName(dict);

    String typeS = null;
    amiFont = getAmiFontByFontName(fontName);
    if (amiFont == null) {
        // some confusion here between fontName and fontFamilyName
        amiFont = new AMIFont(fontName, null, typeS, dict);
        amiFont.setFontName(fontName);
        amiFontByFontNameMap.put(fontName, amiFont);

        String indent = "";
        for (int i = 0; i < level; i++) {
            indent += " ";
        }

        LOG.debug(String.format("%s****************** level %d font dict:", indent, level));

        level++;
        indent += "    ";

        for (COSName key : dict.keySet()) {
            String keyName = key.getName();
            Object object = dict.getDictionaryObject(key);
            LOG.debug(String.format("%s****************** %s = %s", indent, keyName, object));
        }

        COSArray array = (COSArray) dict.getDictionaryObject(COSName.DESCENDANT_FONTS);
        if (array != null) {
            LOG.debug(String.format("%s****************** descendant fonts (%d):", indent, array.size()));
            amiFont = lookupOrCreateFont(level, (COSDictionary) array.getObject(0));
        }
    }
    return amiFont;
}

From source file:org.xmlcml.pdf2svg.AMIFont.java

License:Apache License

public static String getFontName(COSDictionary dict) {
    String fontName = null;//from  w  ww  .  j  a v  a  2  s . c om
    String baseFontS = null;
    for (COSName key : dict.keySet()) {
        String keyName = key.getName();
        if (keyName == null) {
            LOG.error("Null key");
            continue;
        } else if (!(key instanceof COSName)) {
            LOG.error("key not COSName");
            continue;
        }
        String cosNameName = null;
        COSBase cosBase = dict.getDictionaryObject(key);
        if (cosBase instanceof COSName) {
            COSName cosName = (COSName) cosBase;
            cosNameName = cosName.getName();
            LOG.trace("Name:" + cosNameName);
        } else if (cosBase instanceof COSInteger) {
            COSInteger cosInteger = (COSInteger) cosBase;
            LOG.trace("Integer: " + cosInteger.intValue());
        } else if (cosBase instanceof COSArray) {
            COSArray cosArray = (COSArray) cosBase;
            LOG.trace("Array: " + cosArray.size() + " / " + cosArray);
        } else if (cosBase instanceof COSDictionary) {
            COSDictionary cosDictionary = (COSDictionary) cosBase;
            LOG.trace("Dictionary: " + cosDictionary);
        } else {
            LOG.error("COS " + cosBase);
        }
        if (cosNameName != null && keyName.equals(N_NAME)) {
            fontName = cosNameName;
        } else if (cosNameName != null && keyName.equals(N_BASE_FONT)) {
            baseFontS = cosNameName;
        }
    }
    if (fontName == null) {
        fontName = baseFontS;
    }
    return fontName;
}

From source file:org.xmlcml.pdf2svg.AMIFont.java

License:Apache License

public static PDFont getFirstDescendantFont(PDFont pdFont) {
    COSDictionary dict = (COSDictionary) pdFont.getCOSObject();
    COSArray array = dict == null ? null : (COSArray) dict.getDictionaryObject(COSName.DESCENDANT_FONTS);
    PDFont descendantFont = null;//ww w  . j  a v  a 2s  .co  m
    try {
        descendantFont = array == null ? null : PDFontFactory.createFont((COSDictionary) array.getObject(0));
    } catch (IOException e) {
        LOG.error("****************** Can't create descendant font! for " + pdFont);
    }
    return descendantFont;
}

From source file:pdfbox.SignatureVerifier.java

License:Apache License

public Map<String, SignatureResult> extractSignatures(File infile) throws IOException, CertificateException,
        NoSuchAlgorithmException, InvalidKeyException, NoSuchProviderException, SignatureException {
    Map<String, SignatureResult> result = new HashMap<>();

    try (PDDocument document = PDDocument.load(infile)) {
        for (PDSignature sig : document.getSignatureDictionaries()) {
            COSDictionary sigDict = sig.getCOSObject();
            COSString contents = (COSString) sigDict.getDictionaryObject(COSName.CONTENTS);

            // download the signed content
            byte[] buf;
            try (FileInputStream fis = new FileInputStream(infile)) {
                buf = sig.getSignedContent(fis);
            }/*  w w  w  . j av a  2  s. c  o  m*/

            System.out.println("Signature found");
            System.out.println("Name:     " + sig.getName());
            System.out.println("Modified: " + sdf.format(sig.getSignDate().getTime()));
            String subFilter = sig.getSubFilter();
            if (subFilter != null) {
                switch (subFilter) {
                case "adbe.pkcs7.detached": // COSName.ADBE_PKCS7_DETACHED
                    result.put(sig.getName(), verifyPKCS7(buf, contents, sig));

                    //TODO check certificate chain, revocation lists, timestamp...
                    break;
                case "adbe.pkcs7.sha1": // COSName.ADBE_PKCS7_SHA1
                {
                    // example: PDFBOX-1452.pdf
                    //COSString certString = (COSString) sigDict.getDictionaryObject(
                    //        COSName.CONTENTS);
                    byte[] certData = contents.getBytes();
                    Collection<? extends Certificate> certs = getCertificates(certData);
                    System.out.println("certs=" + certs);
                    byte[] hash = MessageDigest.getInstance("SHA1").digest(buf);
                    result.put(sig.getName(), verifyPKCS7(hash, contents, sig));

                    //TODO check certificate chain, revocation lists, timestamp...
                    break;
                }
                case "adbe.x509.rsa_sha1": // COSName.ADBE_PKCS7_SHA1
                {
                    // example: PDFBOX-2693.pdf
                    COSString certString = (COSString) sigDict.getDictionaryObject(COSName.getPDFName("Cert"));
                    byte[] certData = certString.getBytes();
                    Collection<? extends Certificate> certs = getCertificates(certData);
                    System.out.println("certs=" + certs);

                    //TODO verify signature
                    throw new IOException(subFilter + " verification not supported");
                    //break;
                }
                default:
                    throw new IOException("Unknown certificate type: " + subFilter);
                    //break;
                }
            } else {
                throw new IOException("Missing subfilter for cert dictionary");
            }
        }
    } catch (CMSException | OperatorCreationException ex) {
        throw new IOException(ex);
    }

    return result;
}