Example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog

List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog.

Prototype

public PDDocumentCatalog getDocumentCatalog() 

Source Link

Document

This will get the document CATALOG.

Usage

From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java

License:Apache License

private COSDictionary getFont(PDDocument doc, String internalname) throws IOException {
    PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0);
    PDResources sourcePageResources = page.getResources();
    COSDictionary fonts = (COSDictionary) sourcePageResources.getCOSObject().getDictionaryObject(COSName.FONT);
    return (COSDictionary) fonts.getDictionaryObject(internalname);
}

From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java

License:Apache License

@Test
public void testStream() throws Exception {
    PDFDocument pdfdoc = new PDFDocument("");
    PDFPage pdfpage = new PDFPage(new PDFResources(pdfdoc), 0, r, r, r, r);
    pdfpage.setDocument(pdfdoc);/*from  ww  w .  ja v  a 2s .co  m*/
    PDFBoxAdapter adapter = new PDFBoxAdapter(pdfpage, new HashMap(), new HashMap<Integer, PDFArray>());
    PDDocument doc = getResource(ROTATE);
    PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0);
    AffineTransform at = new AffineTransform();
    Rectangle r = new Rectangle(0, 1650, 842000, 595000);
    String stream = adapter.createStreamFromPDFBoxPage(doc, page, "key", at, null, r);
    Assert.assertEquals(at, new AffineTransform(-0.0, 1.0000000554888686, 1.0000000554888686, 0.0, 0.0,
            -2.0742416381835938E-5));
    Assert.assertTrue(stream.contains("/GS0106079 gs"));
    Assert.assertTrue(stream.contains("/TT0106079 1 Tf"));
    doc.close();
}

From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java

License:Apache License

@Test
public void testTaggedPDFWriter() throws IOException {
    PDFDocument pdfdoc = new PDFDocument("");
    PDFPage pdfpage = new PDFPage(new PDFResources(pdfdoc), 0, r, r, r, r);
    pdfpage.setDocument(pdfdoc);/*from   ww  w . j a v a  2  s  . c  om*/
    PDFBoxAdapter adapter = new PDFBoxAdapter(pdfpage, new HashMap(), new HashMap<Integer, PDFArray>());
    adapter.setCurrentMCID(5);
    PDDocument doc = getResource(HELLOTagged);
    PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0);
    AffineTransform at = new AffineTransform();
    Rectangle r = new Rectangle(0, 1650, 842000, 595000);
    String stream = adapter.createStreamFromPDFBoxPage(doc, page, "key", at, null, r);
    Assert.assertTrue(stream, stream.contains("/P <</MCID 5 >>BDC"));
    doc.close();
}

From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java

License:Apache License

@Test
public void testLink() throws Exception {
    PDFDocument pdfdoc = new PDFDocument("");
    PDFPage pdfpage = new PDFPage(new PDFResources(pdfdoc), 0, r, r, r, r);
    pdfpage.setDocument(pdfdoc);//www.  ja v  a2  s  .co  m
    pdfpage.setObjectNumber(1);
    Map<Integer, PDFArray> pageNumbers = new HashMap<Integer, PDFArray>();
    PDFBoxAdapter adapter = new PDFBoxAdapter(pdfpage, new HashMap(), pageNumbers);
    PDDocument doc = getResource(LINK);
    PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0);
    AffineTransform at = new AffineTransform();
    Rectangle r = new Rectangle(0, 1650, 842000, 595000);
    String stream = adapter.createStreamFromPDFBoxPage(doc, page, "key", at, null, r);
    Assert.assertTrue(stream.contains("/Link <</MCID 5 >>BDC"));
    Assert.assertEquals(pageNumbers.size(), 4);
    PDFAnnotList annots = (PDFAnnotList) pdfpage.get("Annots");
    Assert.assertEquals(annots.toPDFString(), "[\n1 0 R\n2 0 R\n]");
    doc.close();
}

From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java

License:Apache License

@Test
public void testXform() throws Exception {
    PDFDocument pdfdoc = new PDFDocument("");
    pdfdoc.getFilterMap().put(PDFFilterList.DEFAULT_FILTER, Arrays.asList("null"));
    pdfdoc.setMergeFontsEnabled(true);//  w  ww.  j a v a  2 s  .co m
    PDFPage pdfpage = new PDFPage(new PDFResources(pdfdoc), 0, r, r, r, r);
    pdfpage.setDocument(pdfdoc);
    pdfpage.setObjectNumber(1);
    Map<Integer, PDFArray> pageNumbers = new HashMap<Integer, PDFArray>();
    PDFBoxAdapter adapter = new PDFBoxAdapter(pdfpage, new HashMap(), pageNumbers);
    PDDocument doc = getResource(XFORM);
    PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0);
    AffineTransform at = new AffineTransform();
    Rectangle r = new Rectangle(0, 1650, 842000, 595000);
    adapter.createStreamFromPDFBoxPage(doc, page, "key", at, new FontInfo(), r);
    doc.close();
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    pdfdoc.output(bos);
    Assert.assertFalse(bos.toString("UTF-8").contains("/W 5 /H 5 /BPC 8 /CS /RGB ID "));
}

From source file:org.apache.padaf.preflight.helpers.CatalogValidationHelper.java

License:Apache License

/**
 * This method checks the content of each OutputIntent. The S entry must
 * contain GTS_PDFA1. The DestOuputProfile must contain a valid ICC Profile
 * Stream./*  w  w  w.  j  ava 2 s . c om*/
 * 
 * If there are more than one OutputIntent, they have to use the same ICC
 * Profile.
 * 
 * This method returns a list of ValidationError. It is empty if no errors
 * have been found.
 * 
 * @param handler
 * @return
 * @throws ValidationException
 */
public List<ValidationError> validateOutputIntent(DocumentHandler handler) throws ValidationException {
    List<ValidationError> result = new ArrayList<ValidationError>(0);
    PDDocument pdDocument = handler.getDocument();
    PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
    COSDocument cDoc = pdDocument.getDocument();

    COSBase cBase = catalog.getCOSDictionary()
            .getItem(COSName.getPDFName(DOCUMENT_DICTIONARY_KEY_OUTPUT_INTENTS));
    COSArray outputIntents = COSUtils.getAsArray(cBase, cDoc);

    Map<COSObjectKey, Boolean> tmpDestOutputProfile = new HashMap<COSObjectKey, Boolean>();

    for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) {
        COSDictionary dictionary = COSUtils.getAsDictionary(outputIntents.get(i), cDoc);

        if (dictionary == null) {

            result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                    "OutputIntent object is null or isn't a dictionary"));

        } else {
            // ---- S entry is mandatory and must be equals to GTS_PDFA1
            String sValue = dictionary.getNameAsString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_S));
            if (!OUTPUT_INTENT_DICTIONARY_VALUE_GTS_PDFA1.equals(sValue)) {
                result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_S_VALUE_INVALID,
                        "The S entry of the OutputIntent isn't GTS_PDFA1"));
                continue;
            }

            // ---- OutputConditionIdentifier is a mandatory field
            String outputConditionIdentifier = dictionary
                    .getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_OUTPUT_CONDITION_IDENTIFIER));
            if (outputConditionIdentifier == null) {// empty string is autorized (it may be an application specific value)
                result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                        "The OutputIntentCondition is missing"));
                continue;
            }

            // ---- If OutputConditionIdentifier is "Custom" or a non Standard ICC Characterization :
            // ---- DestOutputProfile and Info are mandatory
            // ---- DestOutputProfile must be a ICC Profile

            // ---- Because of PDF/A conforming file needs to specify the color characteristics, the DestOutputProfile
            // ---- is checked even if the OutputConditionIdentifier isn't "Custom"
            COSBase dop = dictionary
                    .getItem(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_DEST_OUTPUT_PROFILE));
            ValidationError valer = validateICCProfile(dop, cDoc, tmpDestOutputProfile, handler);
            if (valer != null) {
                result.add(valer);
                continue;
            }

            // TODO [LAZY] When Lazy mode will be added, this block should be uncommented to set result as warning.
            //            if (!isStandardICCCharacterization(outputConditionIdentifier)) {
            //               String info = dictionary.getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_INFO));
            //               if (info == null || "".equals(info)) {
            //                  result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
            //                        "The Info entry of a OutputIntent dictionary is missing"));
            //                  continue;
            //               }
            //            }
        }
    }
    return result;
}

From source file:org.apache.padaf.preflight.RetrieveMissingStream.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.err.println("usage : RetrieveMissingStream file");
        System.exit(233);/*from  w ww .  j  ava2s.co m*/
    }

    HashSet<COSObjectKey> listOfKeys = new HashSet<COSObjectKey>();

    PDDocument document = PDDocument.load(new FileInputStream(args[0]));
    List<COSObject> lCosObj = document.getDocument().getObjects();
    for (COSObject cosObject : lCosObj) {

        if (cosObject.getObject() instanceof COSStream) {
            listOfKeys.add(new COSObjectKey(cosObject.getObjectNumber().intValue(),
                    cosObject.getGenerationNumber().intValue()));
        }

    }

    PDDocumentCatalog catalog = document.getDocumentCatalog();
    List<?> pages = catalog.getAllPages();
    for (int i = 0; i < pages.size(); ++i) {
        PDPage pdp = (PDPage) pages.get(i);
        PDStream pdStream = pdp.getContents();

        COSBase b = pdp.getCOSDictionary().getItem(COSName.getPDFName("Contents"));
        System.out.println();
    }
}

From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java

License:Apache License

private void extractEmbeddedDocuments(PDDocument document) throws IOException, SAXException, TikaException {
    PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog());
    PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
    if (efTree == null) {
        return;//from ww  w .  jav  a2 s .c  om
    }

    Map<String, PDComplexFileSpecification> embeddedFileNames = efTree.getNames();
    //For now, try to get the embeddedFileNames out of embeddedFiles or its kids.
    //This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java
    //If there is a need we could add a fully recursive search to find a non-null
    //Map<String, COSObjectable> that contains the doc info.
    if (embeddedFileNames != null) {
        processEmbeddedDocNames(embeddedFileNames);
    } else {
        List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
        if (kids == null) {
            return;
        }
        for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
            embeddedFileNames = node.getNames();
            if (embeddedFileNames != null) {
                processEmbeddedDocNames(embeddedFileNames);
            }
        }
    }
}

From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java

License:Apache License

@Override
protected void startDocument(PDDocument pdf) throws IOException {
    try {//w ww.  jav  a  2s.co m
        xhtml.startDocument();
        handleDestinationOrAction(pdf.getDocumentCatalog().getOpenAction(), ActionTrigger.DOCUMENT_OPEN);
    } catch (TikaException | SAXException e) {
        throw new IOExceptionWithCause("Unable to start a document", e);
    }
}

From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java

License:Apache License

@Override
protected void endDocument(PDDocument pdf) throws IOException {
    try {/*from   ww w.  ja v a  2  s. c  o m*/
        // Extract text for any bookmarks:
        extractBookmarkText();
        try {
            extractEmbeddedDocuments(pdf);
        } catch (IOException e) {
            handleCatchableIOE(e);
        }

        //extract acroform data at end of doc
        if (config.getExtractAcroFormContent() == true) {
            try {
                extractAcroForm(pdf);
            } catch (IOException e) {
                handleCatchableIOE(e);
            }
        }
        PDDocumentCatalogAdditionalActions additionalActions = pdf.getDocumentCatalog().getActions();
        handleDestinationOrAction(additionalActions.getDP(), ActionTrigger.AFTER_DOCUMENT_PRINT);
        handleDestinationOrAction(additionalActions.getDS(), ActionTrigger.AFTER_DOCUMENT_SAVE);
        handleDestinationOrAction(additionalActions.getWC(), ActionTrigger.BEFORE_DOCUMENT_CLOSE);
        handleDestinationOrAction(additionalActions.getWP(), ActionTrigger.BEFORE_DOCUMENT_PRINT);
        handleDestinationOrAction(additionalActions.getWS(), ActionTrigger.BEFORE_DOCUMENT_SAVE);
        xhtml.endDocument();
    } catch (TikaException e) {
        throw new IOExceptionWithCause("Unable to end a document", e);
    } catch (SAXException e) {
        throw new IOExceptionWithCause("Unable to end a document", e);
    }
}