Example usage for org.apache.pdfbox.pdmodel.graphics.image PDImageXObject getSuffix

List of usage examples for org.apache.pdfbox.pdmodel.graphics.image PDImageXObject getSuffix

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.graphics.image PDImageXObject getSuffix.

Prototype

@Override
public String getSuffix() 

Source Link

Document

This will get the suffix for this image type, e.g.

Usage

From source file:at.gv.egiz.pdfas.lib.impl.pdfbox2.placeholder.SignaturePlaceholderExtractor.java

License:EUPL

/**
 * Checks an image if it is a placeholder for a signature.
 *
 * @param image//from w w w  .  j ava  2s  .  c o m
 * @return
 * @throws IOException
 */
private SignaturePlaceholderData checkImage(PDImageXObject image) throws IOException {
    BufferedImage bimg = image.getImage();
    if (bimg == null) {
        String type = image.getSuffix();
        if (type != null) {
            type = type.toUpperCase() + " images";
        } else {
            type = "Image type";
        }
        logger.info("Unable to extract image for QRCode analysis. " + type
                + " not supported. Add additional JAI Image filters to your classpath. Refer to https://jai.dev.java.net. Skipping image.");
        return null;
    }
    if (bimg.getHeight() < 10 || bimg.getWidth() < 10) {
        logger.debug("Image too small for QRCode. Skipping image.");
        return null;
    }

    LuminanceSource source = new BufferedImageLuminanceSource(bimg);
    BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source));
    Result result;
    long before = System.currentTimeMillis();
    try {
        Hashtable<DecodeHintType, Object> hints = new Hashtable<DecodeHintType, Object>();
        Vector<BarcodeFormat> formats = new Vector<BarcodeFormat>();
        formats.add(BarcodeFormat.QR_CODE);
        hints.put(DecodeHintType.POSSIBLE_FORMATS, formats);
        result = new MultiFormatReader().decode(bitmap, hints);

        String text = result.getText();
        String profile = null;
        String type = null;
        String sigKey = null;
        String id = null;
        if (text != null) {
            if (text.startsWith(QR_PLACEHOLDER_IDENTIFIER)) {

                String[] data = text.split(";");
                if (data.length > 1) {
                    for (int i = 1; i < data.length; i++) {
                        String kvPair = data[i];
                        String[] kv = kvPair.split("=");
                        if (kv.length != 2) {
                            logger.debug("Invalid parameter in placeholder data: " + kvPair);
                        } else {
                            if (kv[0].equalsIgnoreCase(SignaturePlaceholderData.ID_KEY)) {
                                id = kv[1];
                            } else if (kv[0].equalsIgnoreCase(SignaturePlaceholderData.PROFILE_KEY)) {
                                profile = kv[1];
                            } else if (kv[0].equalsIgnoreCase(SignaturePlaceholderData.SIG_KEY_KEY)) {
                                sigKey = kv[1];
                            } else if (kv[0].equalsIgnoreCase(SignaturePlaceholderData.TYPE_KEY)) {
                                type = kv[1];
                            }
                        }
                    }
                }
                return new SignaturePlaceholderData(profile, type, sigKey, id);
            } else {
                logger.warn("QR-Code found but does not start with \"" + QR_PLACEHOLDER_IDENTIFIER
                        + "\". Ignoring QR placeholder.");
            }
        }
    } catch (ReaderException re) {
        if (logger.isDebugEnabled()) {
            logger.debug(
                    "Could not decode - not a placeholder. needed: " + (System.currentTimeMillis() - before));
        }
        if (!(re instanceof NotFoundException)) {
            if (logger.isInfoEnabled()) {
                logger.info("Failed to decode image", re);
            }
        }
    } catch (ArrayIndexOutOfBoundsException e) {
        if (logger.isInfoEnabled()) {
            logger.info("Failed to decode image. Probably a zxing bug", e);
        }
    }
    return null;
}

From source file:com.fngry.monk.biz.demo.pdf.pdfbox.PrintImageLocations.java

License:Apache License

/**
 * This is used to handle an operation.//from ww w .  j  a va2 s  .  co m
 *
 * @param operator The operation to perform.
 * @param operands The list of arguments.
 *
 * @throws IOException If there is an error processing the operation.
 */
@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
    String operation = operator.getName();
    if ("Do".equals(operation)) {
        COSName objectName = (COSName) operands.get(0);
        PDXObject xobject = getResources().getXObject(objectName);
        if (xobject instanceof PDImageXObject) {
            PDImageXObject image = (PDImageXObject) xobject;

            if ("png".equals(image.getSuffix())) {
                return;
            }

            int imageWidth = image.getWidth();
            int imageHeight = image.getHeight();

            //                System.out.println("Found image [" + objectName.getName() + "]");

            Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
            float imageXScale = ctmNew.getScalingFactorX();
            float imageYScale = ctmNew.getScalingFactorY();

            // position in user space units. 1 unit = 1/72 inch at 72 dpi
            //                System.out.println("position in PDF = " + ctmNew.getTranslateX() + ", " + ctmNew.getTranslateY() + " in user space units");
            //                // raw size in pixels
            //                System.out.println("raw image size  = " + imageWidth + ", " + imageHeight + " in pixels");
            //                // displayed size in user space units
            //                System.out.println("displayed size  = " + imageXScale + ", " + imageYScale + " in user space units");

            // displayed size in inches at 72 dpi rendering
            //                imageXScale /= 72;
            //                imageYScale /= 72;
            //                System.out.println("displayed size  = " + imageXScale + ", " + imageYScale + " in inches at 72 dpi rendering");
            //                // displayed size in millimeters at 72 dpi rendering
            //                imageXScale *= 25.4;
            //                imageYScale *= 25.4;
            //                System.out.println("displayed size  = " + imageXScale + ", " + imageYScale + " in millimeters at 72 dpi rendering");
            System.out.println();

            //                BufferedImage bufferImage = image.getImage();
            //                ByteArrayOutputStream os = new ByteArrayOutputStream();
            //                ImageIO.write(bufferImage, image.getSuffix(), os);
            //
            //                String fileName = this.pageName + "_" + objectName.getName() + "." + image.getSuffix();
            //                Path outputFile = new File("/Users/gaorongyu/Downloads/temp/"
            //                        + this.pageName + "_" + objectName.getName() + "." + image.getSuffix()).toPath();

            ImageInfo imageInfo = new ImageInfo((int) ctmNew.getTranslateX(), (int) ctmNew.getTranslateY(),
                    (int) imageXScale, (int) imageYScale);
            imageInfo.setImage(image);
            imageInfo.setObjectName(objectName);

            imageInfoList.add(imageInfo);
            //                java.nio.file.Files.copy(new ByteArrayInputStream(os.toByteArray()), outputFile);

        } else if (xobject instanceof PDFormXObject) {
            PDFormXObject form = (PDFormXObject) xobject;
            showForm(form);
        }
    } else {
        super.processOperator(operator, operands);
    }
}

From source file:org.apache.tika.parser.pdf.EnhancedPDF2XHTML.java

License:Apache License

private void extractImages(PDResources resources) throws SAXException, IOException {
    if (resources == null || config.getExtractInlineImages() == false) {
        return;/* w  w  w .j a  v a  2  s.c o m*/
    }

    Iterable<COSName> cosIterable = resources.getXObjectNames();
    if (cosIterable == null) {
        return;
    }

    for (COSName name : cosIterable) {
        PDXObject object = resources.getXObject(name);
        if (object instanceof PDFormXObject) {
            extractImages(((PDFormXObject) object).getResources());
        } else if (object instanceof PDImageXObject) {
            PDImageXObject image = (PDImageXObject) object;

            Metadata metadata = new Metadata();
            String extension = "";

            if ("jpg".equalsIgnoreCase(image.getSuffix())) {
                metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
                extension = ".jpg";
            } else if ("tiff".equalsIgnoreCase(image.getSuffix())) {
                metadata.set(Metadata.CONTENT_TYPE, "image/tiff");
                extension = ".tif";
            } else if ("jpx".equalsIgnoreCase(image.getSuffix())) {
                metadata.set(Metadata.CONTENT_TYPE, "image/jpx");
                extension = ".jpx";
            } else if ("png".equalsIgnoreCase(image.getSuffix())) {
                metadata.set(Metadata.CONTENT_TYPE, "image/png");
                extension = ".png";
            }

            Integer imageNumber = processedInlineImages.get(name.getName());
            if (imageNumber == null) {
                imageNumber = inlineImageCounter++;
            }
            String fileName = "image" + imageNumber + extension;
            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);

            // Output the img tag
            AttributesImpl attr = new AttributesImpl();
            attr.addAttribute("", "src", "src", "CDATA", "embedded:" + fileName);
            attr.addAttribute("", "alt", "alt", "CDATA", fileName);
            handler.startElement("img", attr);
            handler.endElement("img");

            //Do we only want to process unique COSObject ids?
            //If so, have we already processed this one?
            if (config.getExtractUniqueInlineImagesOnly() == true) {
                String cosObjectId = name.getName();
                if (processedInlineImages.containsKey(cosObjectId)) {
                    continue;
                }
                processedInlineImages.put(cosObjectId, imageNumber);
            }

            metadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
                    TikaCoreProperties.EmbeddedResourceType.INLINE.toString());

            EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor();
            if (extractor.shouldParseEmbedded(metadata)) {
                try {
                    extractor.parseEmbedded(image.getCOSStream().getFilteredStream(),
                            new EmbeddedContentHandler(handler), metadata, false);
                } catch (IOException e) {
                    // could not extract this image, so just skip it...
                }
            }
        }
    }
}

From source file:org.apache.tika.parser.pdf.PDF2XHTMLPureJava.java

License:Apache License

private void extractImages(PDResources resources, Set<COSBase> seenThisPage) throws SAXException, IOException {
    if (resources == null || config.getExtractInlineImages() == false) {
        return;/*from  www  .  j a v a 2s .c  o m*/
    }

    for (COSName name : resources.getXObjectNames()) {

        PDXObject object = null;
        try {
            object = resources.getXObject(name);
        } catch (MissingImageReaderException e) {
            EmbeddedDocumentUtil.recordException(e, metadata);
            continue;
        } catch (IOException e) {
            EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
            continue;
        }

        if (object == null) {
            continue;
        }
        COSStream cosStream = object.getCOSObject();
        if (seenThisPage.contains(cosStream)) {
            //avoid infinite recursion TIKA-1742
            continue;
        }
        seenThisPage.add(cosStream);

        if (object instanceof PDFormXObject) {
            extractImages(((PDFormXObject) object).getResources(), seenThisPage);
        } else if (object instanceof PDImageXObject) {

            PDImageXObject image = (PDImageXObject) object;

            Metadata embeddedMetadata = new Metadata();
            String extension = image.getSuffix();

            //TODO remove this next block when upgrading to PDFBox 2.0.5.
            //See: https://issues.apache.org/jira/browse/PDFBOX-3634
            if (extension == null) {
                extension = getJBIG2Suffix(image);
            }

            if (extension == null || extension.equals("png")) {
                embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/png");
                extension = "png";
            } else if (extension.equals("jpg")) {
                embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
            } else if (extension.equals("tiff")) {
                embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/tiff");
                extension = "tif";
            } else if (extension.equals("jpx")) {
                embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/jp2");
            } else if (extension.equals("jb2")) {
                embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/x-jbig2");
            } else {
                //TODO: determine if we need to add more image types
                //                    throw new RuntimeException("EXTEN:" + extension);
            }
            Integer imageNumber = processedInlineImages.get(cosStream);
            if (imageNumber == null) {
                imageNumber = inlineImageCounter++;
            }
            String fileName = "image" + imageNumber + "." + extension;
            embeddedMetadata.set(Metadata.RESOURCE_NAME_KEY, fileName);

            // Output the img tag
            AttributesImpl attr = new AttributesImpl();
            attr.addAttribute("", "src", "src", "CDATA", "embedded:" + fileName);
            attr.addAttribute("", "alt", "alt", "CDATA", fileName);
            xhtml.startElement("img", attr);
            xhtml.endElement("img");

            //Do we only want to process unique COSObject ids?
            //If so, have we already processed this one?
            if (config.getExtractUniqueInlineImagesOnly() == true) {
                if (processedInlineImages.containsKey(cosStream)) {
                    continue;
                }
                processedInlineImages.put(cosStream, imageNumber);
            }

            embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
                    TikaCoreProperties.EmbeddedResourceType.INLINE.toString());

            if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
                ByteArrayOutputStream buffer = new ByteArrayOutputStream();
                try {
                    //TODO: handle image.getMetadata()?
                    try {
                        writeToBuffer(image, extension, buffer);
                    } catch (IOException e) {
                        EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
                        continue;
                    }
                    try (InputStream embeddedIs = TikaInputStream.get(buffer.toByteArray())) {
                        embeddedDocumentExtractor.parseEmbedded(embeddedIs, new EmbeddedContentHandler(xhtml),
                                embeddedMetadata, false);
                    }
                } catch (IOException e) {
                    handleCatchableIOE(e);
                }
            }
        }
    }
}