Example usage for com.itextpdf.text.pdf PdfDictionary getAsDict

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf PdfDictionary getAsDict.

Prototype

public PdfDictionary getAsDict(final PdfName key)

Source Link

Document

Returns a PdfObject as a PdfDictionary, resolving indirect references.

Usage

From source file:org.sejda.impl.itext5.component.PdfUnpacker.java

License:Open Source License

private void unpack(Set<PdfDictionary> dictionaries) throws TaskIOException {
    for (PdfDictionary dictionary : dictionaries) {
        PdfName type = dictionary.getAsName(PdfName.TYPE);
        if (PdfName.F.equals(type) || PdfName.FILESPEC.equals(type)) {
            PdfDictionary ef = dictionary.getAsDict(PdfName.EF);
            PdfString fn = dictionary.getAsString(PdfName.F);
            if (fn != null && ef != null) {
                PRStream prs = (PRStream) PdfReader.getPdfObject(ef.get(PdfName.F));
                if (prs != null) {
                    File tmpFile = copyToTemporaryFile(prs);
                    outputWriter.addOutput(file(tmpFile).name(fn.toUnicodeString()));
                }//from w  w  w.j  a  v a2s  .  c o  m
            }
        }
    }
}

From source file:org.sejda.impl.itext5.component.PdfUnpacker.java

License:Open Source License

private Set<PdfDictionary> getEmbeddedFilesDictionaries(PdfReader reader) {
    Set<PdfDictionary> retSet = new NullSafeSet<PdfDictionary>();
    PdfDictionary catalog = reader.getCatalog();
    PdfDictionary names = catalog.getAsDict(PdfName.NAMES);
    if (names != null) {
        PdfDictionary embFiles = names.getAsDict(PdfName.EMBEDDEDFILES);
        if (embFiles != null) {
            HashMap<String, PdfObject> embMap = PdfNameTree.readTree(embFiles);
            for (PdfObject value : embMap.values()) {
                retSet.add((PdfDictionary) PdfReader.getPdfObject(value));
            }//from w  ww . j  a  v  a  2 s .co m
        }
    }
    return retSet;
}

From source file:org.sejda.impl.itext5.component.PdfUnpacker.java

License:Open Source License

private Set<PdfDictionary> getFileAttachmentsDictionaries(PdfReader reader) {
    Set<PdfDictionary> retSet = new NullSafeSet<PdfDictionary>();
    for (int k = 1; k <= reader.getNumberOfPages(); ++k) {
        PdfArray annots = reader.getPageN(k).getAsArray(PdfName.ANNOTS);
        if (annots != null) {
            for (PdfObject current : annots) {
                PdfDictionary annot = (PdfDictionary) PdfReader.getPdfObject(current);
                if (PdfName.FILEATTACHMENT.equals(annot.getAsName(PdfName.SUBTYPE))) {
                    retSet.add(annot.getAsDict(PdfName.FS));
                }/*  ww w .ja  v a 2s  .  co  m*/
            }
        }
    }
    return retSet;
}

From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java

License:Open Source License

/**
 * Processes PDF's fonts dictionary. During the process alternative names
 * of Standard 14 Fonts are changed to the standard ones, provided that
 * the font definition doesn't include Widths array.
 *
 * Font dictionary in PDF file often includes an array of individual glyphs' widths.
 * Widths array is always required except for the Standard 14 Fonts, which widths
 * are kept by iText itself. Unfortunately, if the font uses alternative name instead of
 * standard one (see PDF Reference 1.7, table H.3), iText doesn't recognize the font as
 * one of the Standard 14 Fonts, and is unable to determine glyphs widths. In such cases
 * this method will change alternative names to standard ones before PDF's parsing process
 *///  ww w .  ja  v a 2s . c  o  m
private void processAlternativeFontNames(PdfDictionary resources) {
    if (resources == null) {
        return;
    }
    PdfDictionary fontsDictionary = resources.getAsDict(PdfName.FONT);

    if (fontsDictionary == null) {
        return;
    }
    for (PdfName pdfFontName : fontsDictionary.getKeys()) {
        if (!(fontsDictionary.get(pdfFontName) instanceof PRIndirectReference)) {
            return;
        }
        PRIndirectReference indRef = (PRIndirectReference) fontsDictionary.get(pdfFontName);
        if (!(PdfReader.getPdfObjectRelease(indRef) instanceof PdfDictionary)) {
            return;
        }
        PdfDictionary fontDictionary = (PdfDictionary) PdfReader.getPdfObjectRelease(indRef);

        PdfName baseFont = fontDictionary.getAsName(PdfName.BASEFONT);
        if (baseFont != null) {
            String fontName = PdfName.decodeName(baseFont.toString());
            if (fontDictionary.getAsArray(PdfName.WIDTHS) == null
                    && ALT_TO_STANDART_FONTS.containsKey(fontName)) {
                fontDictionary.put(PdfName.BASEFONT, ALT_TO_STANDART_FONTS.get(fontName));
            }
        }
    }
}

From source file:pl.edu.icm.cermine.structure.ITextCharacterExtractor.java

License:Open Source License

private void processAlternativeColorSpace(PdfDictionary resources) {
    if (resources == null) {
        return;//from w w w.  j  a v  a 2  s .c  om
    }
    PdfDictionary csDictionary = resources.getAsDict(PdfName.COLORSPACE);
    if (csDictionary == null) {
        return;
    }
    for (PdfName csName : csDictionary.getKeys()) {
        if (csDictionary.getAsArray(csName) != null) {
            csDictionary.put(csName, PdfName.DEVICEGRAY);
        }
    }
}

From source file:uk.bl.wa.tika.parser.pdf.itext.PDFParser.java

License:Apache License

private static void extractMetadata(PdfReader reader, Metadata metadata) {
    try {/*from www .  jav a 2s  .c o  m*/
        HashMap<String, String> map = reader.getInfo();
        // Clone the PDF info:
        for (String key : map.keySet()) {
            metadata.set(key.toLowerCase(), map.get(key));
        }
        // Add other data of interest:
        metadata.set("pdf:version", "1." + reader.getPdfVersion());
        metadata.set("pdf:numPages", "" + reader.getNumberOfPages());
        metadata.set("pdf:cryptoMode", "" + getCryptoModeAsString(reader));
        metadata.set("pdf:openedWithFullPermissions", "" + reader.isOpenedWithFullPermissions());
        metadata.set("pdf:encrypted", "" + reader.isEncrypted());
        metadata.set("pdf:metadataEncrypted", "" + reader.isMetadataEncrypted());
        metadata.set("pdf:128key", "" + reader.is128Key());
        metadata.set("pdf:tampered", "" + reader.isTampered());
        // Also grap XMP metadata, if present:
        byte[] xmpmd = reader.getMetadata();
        if (xmpmd != null) {
            // This is standard Tika code for parsing standard stuff from the XMP:
            JempboxExtractor extractor = new JempboxExtractor(metadata);
            extractor.parse(new ByteArrayInputStream(xmpmd));
            // This is custom XMP-handling code:
            XMPMetadata xmp = XMPMetadata.load(new ByteArrayInputStream(xmpmd));
            // There is a special class for grabbing data in the PDF schema - not sure it will add much here:
            // Could parse xmp:CreatorTool and pdf:Producer etc. etc. out of here.
            //XMPSchemaPDF pdfxmp = xmp.getPDFSchema();
            // Added a PDF/A schema class:
            xmp.addXMLNSMapping(XMPSchemaPDFA.NAMESPACE, XMPSchemaPDFA.class);
            XMPSchemaPDFA pdfaxmp = (XMPSchemaPDFA) xmp.getSchemaByClass(XMPSchemaPDFA.class);
            if (pdfaxmp != null) {
                metadata.set("pdfaid:part", pdfaxmp.getPart());
                metadata.set("pdfaid:conformance", pdfaxmp.getConformance());
                String version = "A-" + pdfaxmp.getPart() + pdfaxmp.getConformance().toLowerCase();
                //metadata.set("pdfa:version", version );                    
                metadata.set("pdf:version", version);
            }
        }
        // Attempt to determine Adobe extension level:
        PdfDictionary extensions = reader.getCatalog().getAsDict(PdfName.EXTENSIONS);
        if (extensions != null) {
            PdfDictionary adobeExt = extensions.getAsDict(PdfName.ADBE);
            if (adobeExt != null) {
                PdfName baseVersion = adobeExt.getAsName(PdfName.BASEVERSION);
                int el = adobeExt.getAsNumber(PdfName.EXTENSIONLEVEL).intValue();
                metadata.set("pdf:version",
                        baseVersion.toString().substring(1) + " Adobe Extension Level " + el);
            }
        }
        // Ensure the normalised metadata are mapped in:
        if (map.get("Title") != null)
            metadata.set(Metadata.TITLE, map.get("Title"));
        if (map.get("Author") != null)
            metadata.set(Metadata.AUTHOR, map.get("Author"));
    } catch (Exception e) {
        System.err.println("PDFParser.extractMetadata() caught Exception: " + e.getMessage());
        e.printStackTrace();
    }
}