Example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog.

Prototype

public PDDocumentCatalog getDocumentCatalog()

Source Link

Document

This will get the document CATALOG.

Usage

From source file:org.ala.harvester.ExtractPubfSciNamesAndImages.java

License:Apache License

private static void extractSciNameAndImages(PDDocument document) throws IOException {
    PDFTextStripperByArea stripper = new PDFTextStripperByArea();
    stripper.setSortByPosition(true);//  ww  w .  j a v a2  s . c o m
    Rectangle rect = new Rectangle(10, 60, 275, 20);
    stripper.addRegion("class1", rect);
    List allPages = document.getDocumentCatalog().getAllPages();

    Writer writer = getSiteMapWriter("anic");

    writeColumnHeaders(writer);

    for (int pageNum = 37; pageNum <= 249; pageNum++) {
        //        for (int pageNum = 156; pageNum <= 156; pageNum++) {
        PDPage page = (PDPage) allPages.get(pageNum);
        PDResources resources = page.getResources();
        Map images = resources.getImages();
        stripper.extractRegions(page);

        String sciName = stripper.getTextForRegion("class1").trim();
        System.out.println("Scientific Name: " + sciName);

        if (images != null) {
            Iterator imageIter = images.keySet().iterator();
            while (imageIter.hasNext()) {
                String key = (String) imageIter.next();
                PDXObjectImage image = (PDXObjectImage) images.get(key);
                String name = null;

                if ("jpg".equals(image.getSuffix())) {
                    name = getUniqueFileName(sciName + "_" + key, image.getSuffix());
                    System.out.println("Writing image:" + name);
                    image.write2file("/data/tmp/" + name);

                    writer.write(sciName);
                    writer.write(",");
                    writer.write(name + "." + image.getSuffix());
                    writer.write("\n");
                }
            }
        }
    }
}

From source file:org.apache.fop.render.pdf.DocumentRootModifierTestCase.java

License:Apache License

@Test
public void testStructTreeRootEntriesToCopy() throws IOException {
    Rectangle2D r = new Rectangle2D.Double();
    PDFDocument pdfDoc = new PDFDocument("");
    PDFPage page = new PDFPage(new PDFResources(pdfDoc), 0, r, r, r, r);
    page.setObjectNumber(1);//from   w ww  .  j av a2  s. c o m
    page.setDocument(pdfDoc);
    pdfDoc.makeStructTreeRoot(null);
    PDFStructTreeRoot structTreeRoot = pdfDoc.getRoot().getStructTreeRoot();
    PDFDictionary rootBaseRoleMap = new PDFDictionary();
    PDFBoxAdapter adapter = new PDFBoxAdapter(page, new HashMap(), new HashMap<Integer, PDFArray>());
    DocumentRootModifier modifier = new DocumentRootModifier(adapter, pdfDoc);
    COSDictionary root = new COSDictionary();
    COSDictionary mapRole = new COSDictionary();
    mapRole.setName("Icon", "Figure");
    root.setItem(COSName.ROLE_MAP, mapRole);
    modifier.structTreeRootEntriesToCopy(root);
    structTreeRoot = pdfDoc.getRoot().getStructTreeRoot();
    PDFDictionary baseRoot = (PDFDictionary) structTreeRoot.get("RoleMap");
    String test = baseRoot.get("Icon").toString();
    String expected = "/Figure";
    Assert.assertEquals(test, expected);

    PDFName para = new PDFName("P");
    rootBaseRoleMap.put("MyPara", para);
    structTreeRoot.put("RoleMap", rootBaseRoleMap);
    modifier.structTreeRootEntriesToCopy(root);
    structTreeRoot = pdfDoc.getRoot().getStructTreeRoot();
    PDFDictionary baseRoot2 = (PDFDictionary) structTreeRoot.get("RoleMap");
    PDFName nameIcon = (PDFName) baseRoot2.get("Icon");
    PDFName myPara = (PDFName) baseRoot2.get("MyPara");
    test = nameIcon.getName();
    expected = "Figure";
    Assert.assertEquals(test, expected);
    test = myPara.getName();
    expected = "P";
    Assert.assertEquals(test, expected);

    PDDocument doc = PDDocument.load(new File(getClass().getResource(CLASSMAP).getFile()));
    COSDictionary temp = (COSDictionary) doc.getDocumentCatalog().getStructureTreeRoot().getCOSObject();
    PDFDictionary classMap = new PDFDictionary();
    PDFDictionary inner = new PDFDictionary();
    inner.put("StartIndent", 0);
    classMap.put("Normal2", inner);
    structTreeRoot.put("ClassMap", classMap);
    modifier.structTreeRootEntriesToCopy(temp);
    structTreeRoot = pdfDoc.getRoot().getStructTreeRoot();
    PDFDictionary testDict = (PDFDictionary) structTreeRoot.get("ClassMap");
    Assert.assertNotNull(testDict.get("Normal2"));
}

From source file:org.apache.fop.render.pdf.pdfbox.AbstractPDFBoxHandler.java

License:Apache License

protected String createStreamForPDF(ImagePDF image, PDFPage targetPage, FOUserAgent userAgent,
        AffineTransform at, FontInfo fontinfo, Rectangle pos, Map<Integer, PDFArray> pageNumbers,
        PDFLogicalStructureHandler handler, PDFStructElem curentSessionElem) throws IOException {

    EventBroadcaster eventBroadcaster = null;
    if (userAgent != null) {
        eventBroadcaster = userAgent.getEventBroadcaster();
    }//from   w  w  w . j  av a 2 s  .co m
    String originalImageUri = image.getInfo().getOriginalURI();
    final int selectedPage = ImageUtil.needPageIndexFromURI(originalImageUri);

    PDDocument pddoc = image.getPDDocument();
    float pdfVersion = pddoc.getDocument().getVersion();
    Version inputDocVersion = Version.getValueOf(String.valueOf(pdfVersion));
    PDFDocument pdfDoc = targetPage.getDocument();

    if (pdfDoc.getPDFVersion().compareTo(inputDocVersion) < 0) {
        try {
            pdfDoc.setPDFVersion(inputDocVersion);
        } catch (IllegalStateException e) {
            getEventProducer(eventBroadcaster).pdfVersionMismatch(this, pdfDoc.getPDFVersionString(),
                    String.valueOf(pdfVersion));
        }
    }

    //Encryption test
    if (pddoc.isEncrypted()) {
        getEventProducer(eventBroadcaster).encryptedPdf(this);
        return null;
    }

    //Warn about potential problems with PDF/A and PDF/X
    if (pdfDoc.getProfile().isPDFAActive()) {
        getEventProducer(eventBroadcaster).pdfAActive(this);
    }
    if (pdfDoc.getProfile().isPDFXActive()) {
        getEventProducer(eventBroadcaster).pdfXActive(this);
    }

    Map<Object, Object> objectCache = getObjectCache(originalImageUri, userAgent);

    PDPage page = pddoc.getDocumentCatalog().getPages().get(selectedPage);

    if (targetPage.getPDFResources().getParentResources() == null) {
        PDFResources res = pdfDoc.getFactory().makeResources();
        res.setParentResources(pdfDoc.getResources());
        res.addContext(targetPage);
        targetPage.put("Resources", res);
    }

    PDFBoxAdapter adapter = new PDFBoxAdapter(targetPage, objectCache, pageNumbers);
    if (handler != null) {
        adapter.setCurrentMCID(handler.getPageParentTree().length());
    }
    String stream = adapter.createStreamFromPDFBoxPage(pddoc, page, originalImageUri, at, fontinfo, pos);
    if (userAgent.isAccessibilityEnabled()) {
        TaggedPDFConductor conductor = new TaggedPDFConductor(curentSessionElem, handler, page, adapter);
        conductor.handleLogicalStructure(pddoc);
    }
    return stream;
}

From source file:org.apache.fop.render.pdf.pdfbox.PageParentTreeFinder.java

License:Apache License

public COSArray getPageParentTreeArray(PDDocument srcDoc) throws IOException {
    int position = srcPage.getCOSObject().getInt(COSName.STRUCT_PARENTS);
    if (position == -1) {
        position = findXObjectStructParent();
    }/*w w w .  j  a  v  a  2s . c  om*/
    if (position != -1) {
        PDNumberTreeNode srcNumberTreeNode = srcDoc.getDocumentCatalog().getStructureTreeRoot().getParentTree();
        return traverseParentTree(srcNumberTreeNode.getCOSObject(), position);
    }
    return new COSArray();
}

From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java

License:Apache License

private void handleAnnotations(PDDocument sourceDoc, PDPage page, AffineTransform at) throws IOException {
    PDDocumentCatalog srcCatalog = sourceDoc.getDocumentCatalog();
    PDAcroForm srcAcroForm = srcCatalog.getAcroForm();
    List pageAnnotations = page.getAnnotations();
    if (srcAcroForm == null && pageAnnotations.isEmpty()) {
        return;/*from  w  w w.  j a  va 2s  .c o  m*/
    }

    moveAnnotations(page, pageAnnotations, at);

    //Pseudo-cache the target page in place of the original source page.
    //This essentially replaces the original page reference with the target page.
    COSObject cosPage = null;
    COSDictionary parentDic = (COSDictionary) page.getCOSObject().getDictionaryObject(COSName.PARENT,
            COSName.P);
    COSArray kids = (COSArray) parentDic.getDictionaryObject(COSName.KIDS);
    for (int i = 0; i < kids.size(); i++) {
        //Hopefully safe to cast, as kids need to be indirect objects
        COSObject kid = (COSObject) kids.get(i);
        if (!pageNumbers.containsKey(i)) {
            PDFArray a = new PDFArray();
            a.add(null);
            pdfDoc.assignObjectNumber(a);
            pdfDoc.addTrailerObject(a);
            pageNumbers.put(i, a);
        }
        cacheClonedObject(kid, pageNumbers.get(i));
        if (kid.getObject() == page.getCOSObject()) {
            cosPage = kid;
        }
    }
    if (cosPage == null) {
        throw new IOException("Illegal PDF. Page not part of parent page node.");
    }

    Set<COSObject> fields = copyAnnotations(page);

    boolean formAlreadyCopied = getCachedClone(srcAcroForm) != null;
    PDFRoot catalog = this.pdfDoc.getRoot();
    PDFDictionary destAcroForm = (PDFDictionary) catalog.get(COSName.ACRO_FORM.getName());
    if (formAlreadyCopied) {
        //skip, already copied
    } else if (destAcroForm == null) {
        if (srcAcroForm != null) {
            //With this, only the first PDF's AcroForm is copied over. If later AcroForms have
            //different properties besides the actual fields, these get lost. Only fields
            //get merged.
            Collection exclude = Collections.singletonList(COSName.FIELDS);
            destAcroForm = (PDFDictionary) cloneForNewDocument(srcAcroForm, srcAcroForm, exclude);
        } else {
            //Work-around for incorrectly split PDFs which lack an AcroForm but have widgets
            //on pages. This doesn't handle the case where field dicts have "C" entries
            //(for the "CO" entry), so this may produce problems, but we have almost no chance
            //to guess the calculation order.
            destAcroForm = new PDFDictionary(pdfDoc.getRoot());
        }
        pdfDoc.registerObject(destAcroForm);
        catalog.put(COSName.ACRO_FORM.getName(), destAcroForm);
    }
    PDFArray clonedFields = (PDFArray) destAcroForm.get(COSName.FIELDS.getName());
    if (clonedFields == null) {
        clonedFields = new PDFArray();
        destAcroForm.put(COSName.FIELDS.getName(), clonedFields);
    }
    for (COSObject field : fields) {
        PDFDictionary clone = (PDFDictionary) cloneForNewDocument(field, field, Arrays.asList(COSName.KIDS));
        clonedFields.add(clone);
    }
}

From source file:org.apache.fop.render.pdf.pdfbox.PreloaderPDF.java

License:Apache License

private ImageInfo loadPDF(String uri, Source src, ImageContext context) throws IOException, ImageException {
    int selectedPage = ImageUtil.needPageIndexFromURI(uri);

    URI docURI = deriveDocumentURI(src.getSystemId());

    PDDocument pddoc = getDocument(context, docURI, src);
    pddoc = Interceptors.getInstance().interceptOnLoad(pddoc, docURI);

    //Disable the warning about a missing close since we rely on the GC to decide when
    //the cached PDF shall be disposed off.
    pddoc.getDocument().setWarnMissingClose(false);

    int pageCount = pddoc.getNumberOfPages();
    if (selectedPage < 0 || selectedPage >= pageCount) {
        throw new ImageException("Selected page (index: " + selectedPage
                + ") does not exist in the PDF file. The document has " + pddoc.getNumberOfPages() + " pages.");
    }// w ww  .  j a v  a  2  s  .  c o m
    PDPage page = pddoc.getDocumentCatalog().getPages().get(selectedPage);
    PDRectangle mediaBox = page.getMediaBox();
    PDRectangle cropBox = page.getCropBox();
    PDRectangle viewBox = cropBox != null ? cropBox : mediaBox;
    int w = Math.round(viewBox.getWidth() * 1000);
    int h = Math.round(viewBox.getHeight() * 1000);

    //Handle the /Rotation entry on the page dict
    int rotation = PDFUtil.getNormalizedRotation(page);
    if (rotation == 90 || rotation == 270) {
        //Swap width and height
        int exch = w;
        w = h;
        h = exch;
    }

    ImageSize size = new ImageSize();
    size.setSizeInMillipoints(w, h);
    size.setResolution(context.getSourceResolution());
    size.calcPixelsFromSize();

    ImageInfo info = new ImageInfo(uri, ImagePDF.MIME_PDF);
    info.setSize(size);
    info.getCustomObjects().put(ImageInfo.ORIGINAL_IMAGE, new ImagePDF(info, pddoc));

    int lastPageIndex = pddoc.getNumberOfPages() - 1;
    if (selectedPage < lastPageIndex) {
        info.getCustomObjects().put(ImageInfo.HAS_MORE_IMAGES, Boolean.TRUE);
    }

    return info;
}

From source file:org.apache.fop.render.pdf.pdfbox.TaggedPDFConductor.java

License:Apache License

public void handleLogicalStructure(PDDocument srcDoc) throws IOException {
    if (isInputPDFTagged(srcDoc) && isStructureTreeRootNull(srcDoc)) {
        merger.setCurrentSessionElem();/*from  ww  w.  j a  va 2  s . c o m*/
        COSDictionary strucRootDict = srcDoc.getDocumentCatalog().getStructureTreeRoot().getCOSObject();
        rootMod.structTreeRootEntriesToCopy(strucRootDict);
        if (!isParentTreeIsPresent(strucRootDict)) {
            merger.createDirectDescendants(strucRootDict, merger.currentSessionElem);
        } else {
            PageParentTreeFinder markedContentsParentFinder = new PageParentTreeFinder(srcPage);
            COSArray markedContentsParents = markedContentsParentFinder.getPageParentTreeArray(srcDoc);
            COSDictionary roleMap = (COSDictionary) strucRootDict.getDictionaryObject(COSName.ROLE_MAP);
            if (roleMap != null) {
                merger.setRoleMap(roleMap);
            }
            merger.copyStructure(markedContentsParents);
        }
    }
    configureCurrentSessionElem(srcDoc);
}

From source file:org.apache.fop.render.pdf.pdfbox.TaggedPDFConductor.java

License:Apache License

private boolean isInputPDFTagged(PDDocument srcDoc) {
    PDMarkInfo mark = srcDoc.getDocumentCatalog().getMarkInfo();
    return mark != null && mark.isMarked();
}

From source file:org.apache.fop.render.pdf.pdfbox.TaggedPDFConductor.java

License:Apache License

private boolean isStructureTreeRootNull(PDDocument srcDoc) {
    return srcDoc.getDocumentCatalog().getStructureTreeRoot() != null;
}

From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java

License:Apache License

private String writeText(FontInfo fi, String pdf) throws IOException {
    PDDocument doc = getResource(pdf);
    PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0);
    AffineTransform at = new AffineTransform();
    String c = getPDFBoxAdapter().createStreamFromPDFBoxPage(doc, page, pdf, at, fi, new Rectangle());
    //        PDResources sourcePageResources = page.findResources();
    //        COSDictionary fonts = (COSDictionary)sourcePageResources.getCOSDictionary().getDictionaryObject(COSName.FONT);
    //        PDFBoxAdapter.PDFWriter w = adapter. new MergeFontsPDFWriter(fonts, fi, "", new ArrayList<COSName>());
    //        String c = w.writeText(page.getContents());
    doc.close();//from  w  w  w.j  av a 2s.c o  m
    return c;
}