List of usage examples for org.apache.pdfbox.pdmodel.graphics.image PDImageXObject getSuffix
@Override
public String getSuffix()
From source file:at.gv.egiz.pdfas.lib.impl.pdfbox2.placeholder.SignaturePlaceholderExtractor.java
License:EUPL
/** * Checks an image if it is a placeholder for a signature. * * @param image//from w w w . j ava 2s . c o m * @return * @throws IOException */ private SignaturePlaceholderData checkImage(PDImageXObject image) throws IOException { BufferedImage bimg = image.getImage(); if (bimg == null) { String type = image.getSuffix(); if (type != null) { type = type.toUpperCase() + " images"; } else { type = "Image type"; } logger.info("Unable to extract image for QRCode analysis. " + type + " not supported. Add additional JAI Image filters to your classpath. Refer to https://jai.dev.java.net. Skipping image."); return null; } if (bimg.getHeight() < 10 || bimg.getWidth() < 10) { logger.debug("Image too small for QRCode. Skipping image."); return null; } LuminanceSource source = new BufferedImageLuminanceSource(bimg); BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source)); Result result; long before = System.currentTimeMillis(); try { Hashtable<DecodeHintType, Object> hints = new Hashtable<DecodeHintType, Object>(); Vector<BarcodeFormat> formats = new Vector<BarcodeFormat>(); formats.add(BarcodeFormat.QR_CODE); hints.put(DecodeHintType.POSSIBLE_FORMATS, formats); result = new MultiFormatReader().decode(bitmap, hints); String text = result.getText(); String profile = null; String type = null; String sigKey = null; String id = null; if (text != null) { if (text.startsWith(QR_PLACEHOLDER_IDENTIFIER)) { String[] data = text.split(";"); if (data.length > 1) { for (int i = 1; i < data.length; i++) { String kvPair = data[i]; String[] kv = kvPair.split("="); if (kv.length != 2) { logger.debug("Invalid parameter in placeholder data: " + kvPair); } else { if (kv[0].equalsIgnoreCase(SignaturePlaceholderData.ID_KEY)) { id = kv[1]; } else if (kv[0].equalsIgnoreCase(SignaturePlaceholderData.PROFILE_KEY)) { profile = kv[1]; } else if (kv[0].equalsIgnoreCase(SignaturePlaceholderData.SIG_KEY_KEY)) { sigKey = kv[1]; } else if (kv[0].equalsIgnoreCase(SignaturePlaceholderData.TYPE_KEY)) { type = kv[1]; } } } } return new SignaturePlaceholderData(profile, type, sigKey, id); } else { logger.warn("QR-Code found but does not start with \"" + QR_PLACEHOLDER_IDENTIFIER + "\". Ignoring QR placeholder."); } } } catch (ReaderException re) { if (logger.isDebugEnabled()) { logger.debug( "Could not decode - not a placeholder. needed: " + (System.currentTimeMillis() - before)); } if (!(re instanceof NotFoundException)) { if (logger.isInfoEnabled()) { logger.info("Failed to decode image", re); } } } catch (ArrayIndexOutOfBoundsException e) { if (logger.isInfoEnabled()) { logger.info("Failed to decode image. Probably a zxing bug", e); } } return null; }
From source file:com.fngry.monk.biz.demo.pdf.pdfbox.PrintImageLocations.java
License:Apache License
/** * This is used to handle an operation.//from ww w . j a va2 s . co m * * @param operator The operation to perform. * @param operands The list of arguments. * * @throws IOException If there is an error processing the operation. */ @Override protected void processOperator(Operator operator, List<COSBase> operands) throws IOException { String operation = operator.getName(); if ("Do".equals(operation)) { COSName objectName = (COSName) operands.get(0); PDXObject xobject = getResources().getXObject(objectName); if (xobject instanceof PDImageXObject) { PDImageXObject image = (PDImageXObject) xobject; if ("png".equals(image.getSuffix())) { return; } int imageWidth = image.getWidth(); int imageHeight = image.getHeight(); // System.out.println("Found image [" + objectName.getName() + "]"); Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix(); float imageXScale = ctmNew.getScalingFactorX(); float imageYScale = ctmNew.getScalingFactorY(); // position in user space units. 1 unit = 1/72 inch at 72 dpi // System.out.println("position in PDF = " + ctmNew.getTranslateX() + ", " + ctmNew.getTranslateY() + " in user space units"); // // raw size in pixels // System.out.println("raw image size = " + imageWidth + ", " + imageHeight + " in pixels"); // // displayed size in user space units // System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in user space units"); // displayed size in inches at 72 dpi rendering // imageXScale /= 72; // imageYScale /= 72; // System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in inches at 72 dpi rendering"); // // displayed size in millimeters at 72 dpi rendering // imageXScale *= 25.4; // imageYScale *= 25.4; // System.out.println("displayed size = " + imageXScale + ", " + imageYScale + " in millimeters at 72 dpi rendering"); System.out.println(); // BufferedImage bufferImage = image.getImage(); // ByteArrayOutputStream os = new ByteArrayOutputStream(); // ImageIO.write(bufferImage, image.getSuffix(), os); // // String fileName = this.pageName + "_" + objectName.getName() + "." + image.getSuffix(); // Path outputFile = new File("/Users/gaorongyu/Downloads/temp/" // + this.pageName + "_" + objectName.getName() + "." + image.getSuffix()).toPath(); ImageInfo imageInfo = new ImageInfo((int) ctmNew.getTranslateX(), (int) ctmNew.getTranslateY(), (int) imageXScale, (int) imageYScale); imageInfo.setImage(image); imageInfo.setObjectName(objectName); imageInfoList.add(imageInfo); // java.nio.file.Files.copy(new ByteArrayInputStream(os.toByteArray()), outputFile); } else if (xobject instanceof PDFormXObject) { PDFormXObject form = (PDFormXObject) xobject; showForm(form); } } else { super.processOperator(operator, operands); } }
From source file:org.apache.tika.parser.pdf.EnhancedPDF2XHTML.java
License:Apache License
private void extractImages(PDResources resources) throws SAXException, IOException { if (resources == null || config.getExtractInlineImages() == false) { return;/* w w w .j a v a 2 s.c o m*/ } Iterable<COSName> cosIterable = resources.getXObjectNames(); if (cosIterable == null) { return; } for (COSName name : cosIterable) { PDXObject object = resources.getXObject(name); if (object instanceof PDFormXObject) { extractImages(((PDFormXObject) object).getResources()); } else if (object instanceof PDImageXObject) { PDImageXObject image = (PDImageXObject) object; Metadata metadata = new Metadata(); String extension = ""; if ("jpg".equalsIgnoreCase(image.getSuffix())) { metadata.set(Metadata.CONTENT_TYPE, "image/jpeg"); extension = ".jpg"; } else if ("tiff".equalsIgnoreCase(image.getSuffix())) { metadata.set(Metadata.CONTENT_TYPE, "image/tiff"); extension = ".tif"; } else if ("jpx".equalsIgnoreCase(image.getSuffix())) { metadata.set(Metadata.CONTENT_TYPE, "image/jpx"); extension = ".jpx"; } else if ("png".equalsIgnoreCase(image.getSuffix())) { metadata.set(Metadata.CONTENT_TYPE, "image/png"); extension = ".png"; } Integer imageNumber = processedInlineImages.get(name.getName()); if (imageNumber == null) { imageNumber = inlineImageCounter++; } String fileName = "image" + imageNumber + extension; metadata.set(Metadata.RESOURCE_NAME_KEY, fileName); // Output the img tag AttributesImpl attr = new AttributesImpl(); attr.addAttribute("", "src", "src", "CDATA", "embedded:" + fileName); attr.addAttribute("", "alt", "alt", "CDATA", fileName); handler.startElement("img", attr); handler.endElement("img"); //Do we only want to process unique COSObject ids? //If so, have we already processed this one? if (config.getExtractUniqueInlineImagesOnly() == true) { String cosObjectId = name.getName(); if (processedInlineImages.containsKey(cosObjectId)) { continue; } processedInlineImages.put(cosObjectId, imageNumber); } metadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.INLINE.toString()); EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor(); if (extractor.shouldParseEmbedded(metadata)) { try { extractor.parseEmbedded(image.getCOSStream().getFilteredStream(), new EmbeddedContentHandler(handler), metadata, false); } catch (IOException e) { // could not extract this image, so just skip it... } } } } }
From source file:org.apache.tika.parser.pdf.PDF2XHTMLPureJava.java
License:Apache License
private void extractImages(PDResources resources, Set<COSBase> seenThisPage) throws SAXException, IOException { if (resources == null || config.getExtractInlineImages() == false) { return;/*from www . j a v a 2s .c o m*/ } for (COSName name : resources.getXObjectNames()) { PDXObject object = null; try { object = resources.getXObject(name); } catch (MissingImageReaderException e) { EmbeddedDocumentUtil.recordException(e, metadata); continue; } catch (IOException e) { EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata); continue; } if (object == null) { continue; } COSStream cosStream = object.getCOSObject(); if (seenThisPage.contains(cosStream)) { //avoid infinite recursion TIKA-1742 continue; } seenThisPage.add(cosStream); if (object instanceof PDFormXObject) { extractImages(((PDFormXObject) object).getResources(), seenThisPage); } else if (object instanceof PDImageXObject) { PDImageXObject image = (PDImageXObject) object; Metadata embeddedMetadata = new Metadata(); String extension = image.getSuffix(); //TODO remove this next block when upgrading to PDFBox 2.0.5. //See: https://issues.apache.org/jira/browse/PDFBOX-3634 if (extension == null) { extension = getJBIG2Suffix(image); } if (extension == null || extension.equals("png")) { embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/png"); extension = "png"; } else if (extension.equals("jpg")) { embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/jpeg"); } else if (extension.equals("tiff")) { embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/tiff"); extension = "tif"; } else if (extension.equals("jpx")) { embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/jp2"); } else if (extension.equals("jb2")) { embeddedMetadata.set(Metadata.CONTENT_TYPE, "image/x-jbig2"); } else { //TODO: determine if we need to add more image types // throw new RuntimeException("EXTEN:" + extension); } Integer imageNumber = processedInlineImages.get(cosStream); if (imageNumber == null) { imageNumber = inlineImageCounter++; } String fileName = "image" + imageNumber + "." + extension; embeddedMetadata.set(Metadata.RESOURCE_NAME_KEY, fileName); // Output the img tag AttributesImpl attr = new AttributesImpl(); attr.addAttribute("", "src", "src", "CDATA", "embedded:" + fileName); attr.addAttribute("", "alt", "alt", "CDATA", fileName); xhtml.startElement("img", attr); xhtml.endElement("img"); //Do we only want to process unique COSObject ids? //If so, have we already processed this one? if (config.getExtractUniqueInlineImagesOnly() == true) { if (processedInlineImages.containsKey(cosStream)) { continue; } processedInlineImages.put(cosStream, imageNumber); } embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.INLINE.toString()); if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); try { //TODO: handle image.getMetadata()? try { writeToBuffer(image, extension, buffer); } catch (IOException e) { EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata); continue; } try (InputStream embeddedIs = TikaInputStream.get(buffer.toByteArray())) { embeddedDocumentExtractor.parseEmbedded(embeddedIs, new EmbeddedContentHandler(xhtml), embeddedMetadata, false); } } catch (IOException e) { handleCatchableIOE(e); } } } } }