Example usage for org.apache.pdfbox.pdmodel.graphics PDXObject getCOSObject

List of usage examples for org.apache.pdfbox.pdmodel.graphics PDXObject getCOSObject

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.graphics PDXObject getCOSObject.

Prototype

@Override
public final COSStream getCOSObject() 

Source Link

Document

Returns the stream.

Usage

From source file:org.apache.fop.render.pdf.pdfbox.PageParentTreeFinder.java

License:Apache License

private int findXObjectStructParent() throws IOException {
    int position = -1;
    Iterable<COSName> mapXObject = srcPage.getResources().getXObjectNames();
    for (COSName n : mapXObject) {
        PDXObject t = srcPage.getResources().getXObject(n);
        COSDictionary xObjectDict = (COSDictionary) t.getCOSObject();
        position = xObjectDict.getInt(COSName.STRUCT_PARENTS);
        if (position != -1) {
            return position;
        }//from   w  w w.  j av a2  s .c om
    }
    return position;
}

From source file:org.apache.tika.parser.pdf.PDF2XHTMLPureJava.java

License:Apache License

private void extractImages(PDResources resources, Set<COSBase> seenThisPage) throws SAXException, IOException {
    if (resources == null || config.getExtractInlineImages() == false) {
        return;/*  w  w  w.  j a  va2s  .  c  om*/
    }

    for (COSName name : resources.getXObjectNames()) {

        PDXObject object = null;
        try {
            object = resources.getXObject(name);
        } catch (MissingImageReaderException e) {
            EmbeddedDocumentUtil.recordException(e, metadata);
            continue;
        } catch (IOException e) {
            EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
            continue;
        }

        if (object == null) {
            continue;
        }
        COSStream cosStream = object.getCOSObject();
        if (seenThisPage.contains(cosStream)) {
            //avoid infinite recursion TIKA-1742
            continue;
        }
        seenThisPage.add(cosStream);

        if (object instanceof PDFormXObject) {
            extractImages(((PDFormXObject) object).getResources(), seenThisPage);
        } else if (object instanceof PDImageXObject) {

            PDImageXObject image = (PDImageXObject) object;

            Metadata embeddedMetadata = new Metadata();
            String extension = image.getSuffix();

            //TODO remove this next block when upgrading to PDFBox 2.0.5.
            //See: https://issues.apache.org/jira/browse/PDFBOX-3634
            if (extension == null) {
                extension = getJBIG2Suffix(image);
            }

            if (extension == null || extension.equals("png")) {
                embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/png");
                extension = "png";
            } else if (extension.equals("jpg")) {
                embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
            } else if (extension.equals("tiff")) {
                embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/tiff");
                extension = "tif";
            } else if (extension.equals("jpx")) {
                embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/jp2");
            } else if (extension.equals("jb2")) {
                embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/x-jbig2");
            } else {
                //TODO: determine if we need to add more image types
                //                    throw new RuntimeException("EXTEN:" + extension);
            }
            Integer imageNumber = processedInlineImages.get(cosStream);
            if (imageNumber == null) {
                imageNumber = inlineImageCounter++;
            }
            String fileName = "image" + imageNumber + "." + extension;
            embeddedMetadata.set(Metadata.RESOURCE_NAME_KEY, fileName);

            // Output the img tag
            AttributesImpl attr = new AttributesImpl();
            attr.addAttribute("", "src", "src", "CDATA", "embedded:" + fileName);
            attr.addAttribute("", "alt", "alt", "CDATA", fileName);
            xhtml.startElement("img", attr);
            xhtml.endElement("img");

            //Do we only want to process unique COSObject ids?
            //If so, have we already processed this one?
            if (config.getExtractUniqueInlineImagesOnly() == true) {
                if (processedInlineImages.containsKey(cosStream)) {
                    continue;
                }
                processedInlineImages.put(cosStream, imageNumber);
            }

            embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
                    TikaCoreProperties.EmbeddedResourceType.INLINE.toString());

            if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
                ByteArrayOutputStream buffer = new ByteArrayOutputStream();
                try {
                    //TODO: handle image.getMetadata()?
                    try {
                        writeToBuffer(image, extension, buffer);
                    } catch (IOException e) {
                        EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
                        continue;
                    }
                    try (InputStream embeddedIs = TikaInputStream.get(buffer.toByteArray())) {
                        embeddedDocumentExtractor.parseEmbedded(embeddedIs, new EmbeddedContentHandler(xhtml),
                                embeddedMetadata, false);
                    }
                } catch (IOException e) {
                    handleCatchableIOE(e);
                }
            }
        }
    }
}