List of usage examples for org.apache.pdfbox.cos COSObject getItem
public COSBase getItem(COSName key)
From source file:at.gv.egiz.pdfas.lib.impl.stamping.pdfbox.PDFBoxFont.java
License:EUPL
private PDFont findCachedFont(PDFBOXObject pdfObject, FontInfoCache fontInfo) { try {/*from ww w.j a va 2 s. c o m*/ if (pdfObject.getFontCache().containsKey(fontInfo.fontPath)) { return pdfObject.getFontCache().get(fontInfo.fontPath); } List<COSObject> cosObjects = pdfObject.getDocument().getDocument().getObjectsByType(COSName.FONT); //COSName cosFontName = COSName.getPDFName(fontInfo.fontName); //COSName cosFontFamily = COSName.getPDFName(fontInfo.fontFamily); Iterator<COSObject> cosObjectIt = cosObjects.iterator(); while (cosObjectIt.hasNext()) { COSObject cosObject = cosObjectIt.next(); COSDictionary baseObject = (COSDictionary) cosObject.getObject(); if (baseObject instanceof COSDictionary) { COSDictionary fontDictionary = (COSDictionary) baseObject; COSBase subType = cosObject.getItem(COSName.SUBTYPE); COSDictionary fontDescriptor = (COSDictionary) cosObject.getDictionaryObject(COSName.FONT_DESC); if (fontDescriptor != null) { String fontName = fontDescriptor.getNameAsString(COSName.FONT_NAME); String fontFamily = fontDescriptor.getNameAsString(COSName.FONT_FAMILY); logger.trace("Inspecting Font {} - {}", fontFamily, fontName); if (COSName.TRUE_TYPE.equals(subType)) { if (fontInfo.fontName != null && fontInfo.fontName.equals(fontName) && fontInfo.fontFamily != null && fontInfo.fontFamily.equals(fontFamily)) { // Found it! :) logger.info("Found Font {}", fontInfo.fontName); return new PDTrueTypeFont(fontDictionary); } } else { logger.debug("Font not a TTF"); } } } else { logger.debug("Font not a COSDictionary"); } } } catch (Exception e) { logger.info("Failed to load existing TTF fonts!", e); } return null; }
From source file:cz.muni.pdfjbim.PdfImageExtractor.java
License:Apache License
/** * This method extracts images by going through all COSObjects pointed from xref table * @param is input stream containing PDF file * @param prefix output basename for images * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet//from w w w . ja v a 2s .c o m * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfParser(InputStream is, String prefix, String password, Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException { // checking arguments and setting appropriate variables if (binarize == null) { binarize = false; } log.debug("Extracting images (binarize set to {})", binarize); InputStream inputStream = null; if (password != null) { try (ByteArrayOutputStream decryptedOutputStream = new ByteArrayOutputStream()) { PdfReader reader = new PdfReader(is, password.getBytes(StandardCharsets.UTF_8)); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); if (stamper != null) { stamper.close(); } inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { inputStream = is; } PDFParser parser = null; COSDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getDocument(); List<COSObject> objs = doc.getObjectsByType(COSName.XOBJECT); if (objs != null) { for (COSObject obj : objs) { COSBase subtype = obj.getItem(COSName.SUBTYPE); if (subtype.toString().equalsIgnoreCase("COSName{Image}")) { COSBase imageObj = obj.getObject(); COSBase cosNameObj = obj.getItem(COSName.NAME); String key; if (cosNameObj != null) { String cosNameKey = cosNameObj.toString(); int startOfKey = cosNameKey.indexOf("{") + 1; key = cosNameKey.substring(startOfKey, cosNameKey.length() - 1); } else { key = "im0"; } int objectNum = obj.getObjectNumber().intValue(); int genNum = obj.getGenerationNumber().intValue(); PDXObjectImage image = (PDXObjectImage) PDXObjectImage.createXObject(imageObj); PDStream pdStr = new PDStream(image.getCOSStream()); List<COSName> filters = pdStr.getFilters(); log.debug("Detected image with color depth: {} bits", image.getBitsPerComponent()); if (filters == null) { continue; } log.debug("Detected filters: {}", filters.toString()); if ((image.getBitsPerComponent() > 1) && (!binarize)) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE)) { log.info("This is LZWDecoded => skipping"); continue; } if (filters.contains(COSName.FLATE_DECODE)) { log.debug("FlateDecoded image detected"); } if (filters.contains(COSName.JBIG2_DECODE)) { if (skipJBig2Images) { log.warn("Allready compressed according to JBIG2 standard => skipping"); continue; } else { log.debug("JBIG2 image detected"); } } // detection of unsupported filters by pdfBox library if (filters.contains(COSName.JPX_DECODE)) { log.warn("Unsupported filter JPXDecode => skipping"); continue; } String name = getUniqueFileName(prefix, image.getSuffix()); log.info("Writing image: {}", name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); namesOfImages.add(name + "." + image.getSuffix()); } } } } catch (IOException ex) { Tools.deleteFilesFromList(namesOfImages); throw new PdfRecompressionException("Unable to parse PDF document", ex); } catch (Exception ex) { Tools.deleteFilesFromList(namesOfImages); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:cz.muni.pdfjbim.PdfImageProcessor.java
License:Apache License
/** * This method extracts images by going through all COSObjects pointed from xref table * @param is input stream containing PDF file * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet/*from w w w .ja v a 2s . co m*/ * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfParser(InputStream is, String prefix, String password, Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException { // checking arguments and setting appropriate variables if (binarize == null) { binarize = false; } InputStream inputStream = null; if (password != null) { try { ByteArrayOutputStream decryptedOutputStream = null; PdfReader reader = new PdfReader(is, password.getBytes()); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { inputStream = is; } PDFParser parser = null; COSDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getDocument(); List<COSObject> objs = doc.getObjectsByType(COSName.XOBJECT); if (objs != null) { for (COSObject obj : objs) { COSBase subtype = obj.getItem(COSName.SUBTYPE); if (subtype.toString().equalsIgnoreCase("COSName{Image}")) { COSBase imageObj = obj.getObject(); COSBase cosNameObj = obj.getItem(COSName.NAME); String key; if (cosNameObj != null) { String cosNameKey = cosNameObj.toString(); int startOfKey = cosNameKey.indexOf("{") + 1; key = cosNameKey.substring(startOfKey, cosNameKey.length() - 1); } else { key = "im0"; } int objectNum = obj.getObjectNumber().intValue(); int genNum = obj.getGenerationNumber().intValue(); PDXObjectImage image = (PDXObjectImage) PDXObjectImage.createXObject(imageObj); PDStream pdStr = new PDStream(image.getCOSStream()); List filters = pdStr.getFilters(); if ((image.getBitsPerComponent() > 1) && (!binarize)) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE.getName())) { log.info("This is LZWDecoded => skipping"); continue; } // detection of unsupported filters by pdfBox library if (filters.contains("JBIG2Decode")) { log.warn("Allready compressed according to JBIG2 standard => skipping"); continue; } if (filters.contains("JPXDecode")) { log.warn("Unsupported filter JPXDecode => skipping"); continue; } String name = getUniqueFileName(prefix, image.getSuffix()); log.info("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); namesOfImages.add(name + "." + image.getSuffix()); } // } } } } catch (IOException ex) { throw new PdfRecompressionException("Unable to parse PDF document", ex); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:org.apache.fop.render.pdf.pdfbox.StructureTreeMerger.java
License:Apache License
public void createDirectDescendants(COSBase base, PDFStructElem parent) throws IOException { if (base instanceof COSDictionary) { COSDictionary baseDict = (COSDictionary) base; if (baseDict.keySet().contains(COSName.K)) { createDirectDescendants(baseDict.getItem(COSName.K), parent); }/*from w w w . j a v a 2 s. c o m*/ } else if (base instanceof COSArray) { COSArray array = (COSArray) base; for (int i = 0; i < array.size(); i++) { createDirectDescendants(array.get(i), parent); } } else { assert base instanceof COSObject; COSObject obj = (COSObject) base; createAndRegisterStructElem(obj); PDFStructElem elem = structElemCache.get((int) obj.getObjectNumber()); copyElemEntries(obj, elem); parent.addKid(elem); elem.setParent(parent); COSBase objKid = obj.getItem(COSName.K); if (objKid != null) { createDirectDescendants(objKid, elem); } } }
From source file:org.apache.fop.render.pdf.pdfbox.StructureTreeMerger.java
License:Apache License
private void createParents(COSArray markedContentParents) throws IOException { for (COSBase entry : markedContentParents) { COSObject elemCos = (COSObject) entry; COSObject elemParent = (COSObject) elemCos.getItem(COSName.P); PDFStructElem elem = structElemCache.get((int) elemCos.getObjectNumber()); createParents(elemCos, elemParent, elem); }// www . j av a2s .c o m }
From source file:org.apache.fop.render.pdf.pdfbox.StructureTreeMergerUtil.java
License:Apache License
public static int findObjectPositionInKidsArray(COSObject kid) { COSObject parent = (COSObject) kid.getItem(COSName.P); COSBase kids = parent.getItem(COSName.K); if (kids instanceof COSArray) { COSArray kidsArray = (COSArray) kids; return kidsArray.indexOfObject(kid); } else {//from w w w . j av a2s.c o m return 0; } }