List of usage examples for org.apache.pdfbox.cos COSObject getObject
public COSBase getObject()
From source file:ReducePDFSize.java
License:Apache License
public static void main(String[] args) throws IOException { if (2 != args.length) { throw new RuntimeException("arg0 must be input file, org1 must be output file"); }/*from ww w . j av a 2s. c o m*/ String in = args[0]; String out = args[1]; PDDocument doc = null; try { doc = PDDocument.load(new File(in)); doc.setAllSecurityToBeRemoved(true); for (COSObject cosObject : doc.getDocument().getObjects()) { COSBase base = cosObject.getObject(); // if it's a stream: decode it, then re-write it using FLATE_DECODE if (base instanceof COSStream) { COSStream stream = (COSStream) base; byte[] bytes; try { bytes = new PDStream(stream).toByteArray(); } catch (IOException ex) { // NOTE: original example code from PDFBox just logged & "continue;"d here, 'skipping' this stream. // If this type of failure ever happens, we can (perhaps) consider (re)ignoring this type of failure? // // IIUC then that will leave the original (non-decoded / non-flated) stream in place? throw new RuntimeException("can't serialize byte[] from: " + cosObject.getObjectNumber() + " " + cosObject.getGenerationNumber() + " obj: " + ex.getMessage(), ex); } stream.removeItem(COSName.FILTER); OutputStream streamOut = stream.createOutputStream(COSName.FLATE_DECODE); streamOut.write(bytes); streamOut.close(); } } doc.getDocumentCatalog(); doc.save(out); } finally { if (doc != null) { doc.close(); } } }
From source file:at.gv.egiz.pdfas.lib.impl.stamping.pdfbox.PDFBoxFont.java
License:EUPL
private PDFont findCachedFont(PDFBOXObject pdfObject, FontInfoCache fontInfo) { try {//from w w w .j av a 2s. c om if (pdfObject.getFontCache().containsKey(fontInfo.fontPath)) { return pdfObject.getFontCache().get(fontInfo.fontPath); } List<COSObject> cosObjects = pdfObject.getDocument().getDocument().getObjectsByType(COSName.FONT); //COSName cosFontName = COSName.getPDFName(fontInfo.fontName); //COSName cosFontFamily = COSName.getPDFName(fontInfo.fontFamily); Iterator<COSObject> cosObjectIt = cosObjects.iterator(); while (cosObjectIt.hasNext()) { COSObject cosObject = cosObjectIt.next(); COSDictionary baseObject = (COSDictionary) cosObject.getObject(); if (baseObject instanceof COSDictionary) { COSDictionary fontDictionary = (COSDictionary) baseObject; COSBase subType = cosObject.getItem(COSName.SUBTYPE); COSDictionary fontDescriptor = (COSDictionary) cosObject.getDictionaryObject(COSName.FONT_DESC); if (fontDescriptor != null) { String fontName = fontDescriptor.getNameAsString(COSName.FONT_NAME); String fontFamily = fontDescriptor.getNameAsString(COSName.FONT_FAMILY); logger.trace("Inspecting Font {} - {}", fontFamily, fontName); if (COSName.TRUE_TYPE.equals(subType)) { if (fontInfo.fontName != null && fontInfo.fontName.equals(fontName) && fontInfo.fontFamily != null && fontInfo.fontFamily.equals(fontFamily)) { // Found it! :) logger.info("Found Font {}", fontInfo.fontName); return new PDTrueTypeFont(fontDictionary); } } else { logger.debug("Font not a TTF"); } } } else { logger.debug("Font not a COSDictionary"); } } } catch (Exception e) { logger.info("Failed to load existing TTF fonts!", e); } return null; }
From source file:com.esri.geoportal.commons.pdf.PdfUtils.java
License:Apache License
/** * Reads metadata values from a PDF file. * // w w w. jav a2s .c o m * @param rawBytes the PDF to read * @param defaultTitle title to be used if the PDF metadata doesn't have one * @param geometryServiceUrl url of a <a href="https://developers.arcgis.com/rest/services-reference/geometry-service.htm">geometry service</a> for reprojecting coordinates. * * @return metadata properties or null if the PDF cannot be read. * * @throws IOException on parsing error */ public static Properties readMetadata(byte[] rawBytes, String defaultTitle, String geometryServiceUrl) throws IOException { Properties ret = new Properties(); // Attempt to read in the PDF file try (PDDocument document = PDDocument.load(rawBytes)) { // See if we can read the PDF if (!document.isEncrypted()) { // Get document metadata PDDocumentInformation info = document.getDocumentInformation(); if (info != null) { if (info.getTitle() != null) { ret.put(PROP_TITLE, info.getTitle()); } else { ret.put(PROP_TITLE, defaultTitle); } if (info.getSubject() != null) { ret.put(PROP_SUBJECT, info.getSubject()); } else { StringBuilder psudoSubject = new StringBuilder(""); psudoSubject.append("\nAuthor: " + info.getAuthor()); psudoSubject.append("\nCreator: " + info.getCreator()); psudoSubject.append("\nProducer: " + info.getProducer()); ret.put(PROP_SUBJECT, psudoSubject.toString()); } if (info.getModificationDate() != null) { ret.put(PROP_MODIFICATION_DATE, info.getModificationDate().getTime()); } else { ret.put(PROP_MODIFICATION_DATE, info.getCreationDate().getTime()); } } else { LOG.warn("Got null metadata for PDF file"); return null; } // Attempt to read in geospatial PDF data COSObject measure = document.getDocument().getObjectByType(COSName.getPDFName("Measure")); String bBox = null; if (measure != null) { // This is a Geospatial PDF (i.e. Adobe's standard) COSDictionary dictionary = (COSDictionary) measure.getObject(); float[] coords = ((COSArray) dictionary.getItem("GPTS")).toFloatArray(); bBox = generateBbox(coords); } else { PDPage page = document.getPage(0); if (page.getCOSObject().containsKey(COSName.getPDFName("LGIDict"))) { // This is a GeoPDF (i.e. TerraGo's standard) bBox = extractGeoPDFProps(page, geometryServiceUrl); } } if (bBox != null) { ret.put(PROP_BBOX, bBox); } } else { LOG.warn("Cannot read encrypted PDF file"); return null; } } catch (IOException ex) { LOG.error("Exception reading PDF", ex); throw ex; } return ret; }
From source file:cz.muni.pdfjbim.PdfImageExtractor.java
License:Apache License
/** * This method extracts images by going through all COSObjects pointed from xref table * @param is input stream containing PDF file * @param prefix output basename for images * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet/*from w w w . j a v a 2s . co m*/ * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfParser(InputStream is, String prefix, String password, Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException { // checking arguments and setting appropriate variables if (binarize == null) { binarize = false; } log.debug("Extracting images (binarize set to {})", binarize); InputStream inputStream = null; if (password != null) { try (ByteArrayOutputStream decryptedOutputStream = new ByteArrayOutputStream()) { PdfReader reader = new PdfReader(is, password.getBytes(StandardCharsets.UTF_8)); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); if (stamper != null) { stamper.close(); } inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { inputStream = is; } PDFParser parser = null; COSDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getDocument(); List<COSObject> objs = doc.getObjectsByType(COSName.XOBJECT); if (objs != null) { for (COSObject obj : objs) { COSBase subtype = obj.getItem(COSName.SUBTYPE); if (subtype.toString().equalsIgnoreCase("COSName{Image}")) { COSBase imageObj = obj.getObject(); COSBase cosNameObj = obj.getItem(COSName.NAME); String key; if (cosNameObj != null) { String cosNameKey = cosNameObj.toString(); int startOfKey = cosNameKey.indexOf("{") + 1; key = cosNameKey.substring(startOfKey, cosNameKey.length() - 1); } else { key = "im0"; } int objectNum = obj.getObjectNumber().intValue(); int genNum = obj.getGenerationNumber().intValue(); PDXObjectImage image = (PDXObjectImage) PDXObjectImage.createXObject(imageObj); PDStream pdStr = new PDStream(image.getCOSStream()); List<COSName> filters = pdStr.getFilters(); log.debug("Detected image with color depth: {} bits", image.getBitsPerComponent()); if (filters == null) { continue; } log.debug("Detected filters: {}", filters.toString()); if ((image.getBitsPerComponent() > 1) && (!binarize)) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE)) { log.info("This is LZWDecoded => skipping"); continue; } if (filters.contains(COSName.FLATE_DECODE)) { log.debug("FlateDecoded image detected"); } if (filters.contains(COSName.JBIG2_DECODE)) { if (skipJBig2Images) { log.warn("Allready compressed according to JBIG2 standard => skipping"); continue; } else { log.debug("JBIG2 image detected"); } } // detection of unsupported filters by pdfBox library if (filters.contains(COSName.JPX_DECODE)) { log.warn("Unsupported filter JPXDecode => skipping"); continue; } String name = getUniqueFileName(prefix, image.getSuffix()); log.info("Writing image: {}", name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); namesOfImages.add(name + "." + image.getSuffix()); } } } } catch (IOException ex) { Tools.deleteFilesFromList(namesOfImages); throw new PdfRecompressionException("Unable to parse PDF document", ex); } catch (Exception ex) { Tools.deleteFilesFromList(namesOfImages); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:cz.muni.pdfjbim.PdfImageProcessor.java
License:Apache License
/** * This method extracts images by going through all COSObjects pointed from xref table * @param is input stream containing PDF file * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet/*from w w w .j av a 2 s. c om*/ * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfParser(InputStream is, String prefix, String password, Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException { // checking arguments and setting appropriate variables if (binarize == null) { binarize = false; } InputStream inputStream = null; if (password != null) { try { ByteArrayOutputStream decryptedOutputStream = null; PdfReader reader = new PdfReader(is, password.getBytes()); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { inputStream = is; } PDFParser parser = null; COSDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getDocument(); List<COSObject> objs = doc.getObjectsByType(COSName.XOBJECT); if (objs != null) { for (COSObject obj : objs) { COSBase subtype = obj.getItem(COSName.SUBTYPE); if (subtype.toString().equalsIgnoreCase("COSName{Image}")) { COSBase imageObj = obj.getObject(); COSBase cosNameObj = obj.getItem(COSName.NAME); String key; if (cosNameObj != null) { String cosNameKey = cosNameObj.toString(); int startOfKey = cosNameKey.indexOf("{") + 1; key = cosNameKey.substring(startOfKey, cosNameKey.length() - 1); } else { key = "im0"; } int objectNum = obj.getObjectNumber().intValue(); int genNum = obj.getGenerationNumber().intValue(); PDXObjectImage image = (PDXObjectImage) PDXObjectImage.createXObject(imageObj); PDStream pdStr = new PDStream(image.getCOSStream()); List filters = pdStr.getFilters(); if ((image.getBitsPerComponent() > 1) && (!binarize)) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE.getName())) { log.info("This is LZWDecoded => skipping"); continue; } // detection of unsupported filters by pdfBox library if (filters.contains("JBIG2Decode")) { log.warn("Allready compressed according to JBIG2 standard => skipping"); continue; } if (filters.contains("JPXDecode")) { log.warn("Unsupported filter JPXDecode => skipping"); continue; } String name = getUniqueFileName(prefix, image.getSuffix()); log.info("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); namesOfImages.add(name + "." + image.getSuffix()); } // } } } } catch (IOException ex) { throw new PdfRecompressionException("Unable to parse PDF document", ex); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:eu.europa.ec.markt.dss.signature.pdf.pdfbox.PdfBoxArray.java
License:Open Source License
private byte[] toBytes(COSBase val) throws IOException { COSStream cosStream = null;/*from ww w . java 2 s . co m*/ if (val instanceof COSObject) { COSObject o = (COSObject) val; final COSBase object = o.getObject(); if (object instanceof COSStream) { cosStream = (COSStream) object; } } if (cosStream == null) { throw new RuntimeException("Cannot find value for " + val + " of class " + val.getClass()); } final byte[] bytes = DSSUtils.toByteArray(cosStream.getUnfilteredStream()); return bytes; }
From source file:modules.PDFFontDependencyExtractorModule.java
License:Apache License
public PDFFontResults extractFontList(File f) throws IOException, InvalidParameterException { PDDocument document;//from w w w . j a v a 2 s.com try { document = PDDocument.load(f); } catch (IOException x) { throw new InvalidParameterException("Not a PDF file"); } SortedSet<FontInformation> ret = new TreeSet<FontInformation>(new Comparator<FontInformation>() { @Override public int compare(FontInformation o1, FontInformation o2) { int a = o1.fontName.compareTo(o2.fontName); if (a != 0) return a; else return o1.fontType.compareTo(o2.fontType); } }); document.getDocumentCatalog().getAllPages(); // The code down here is easier as it gets all the fonts used in the // document. Still, this would inlcude unused fonts, so we get the fonts // page by page and add them to a Hash table. for (COSObject c : document.getDocument().getObjectsByType(COSName.FONT)) { if (c == null || !(c.getObject() instanceof COSDictionary)) { continue; // System.out.println(c.getObject()); } COSDictionary fontDictionary = (COSDictionary) c.getObject(); // System.out.println(dic.getNameAsString(COSName.BASE_FONT)); // } // } // int pagen = document.getNumberOfPages(); // i=0; // for (int p=0;p<pagen;p++){ // PDPage page = (PDPage)pages.get(p); // PDResources res = page.findResources(); // //for each page resources // if (res==null) continue; // // get the font dictionary // COSDictionary fonts = (COSDictionary) // res.getCOSDictionary().getDictionaryObject( COSName.FONT ); // for( COSName fontName : fonts.keySet() ) { // COSObject font = (COSObject) fonts.getItem( fontName ); // // if the font has already been visited we ingore it // long objectId = font.getObjectNumber().longValue(); // if (ret.get(objectId)!=null) // continue; // if( font==null || ! (font.getObject() instanceof COSDictionary) ) // continue; // COSDictionary fontDictionary = (COSDictionary)font.getObject(); // Type MUSt be font if (!fontDictionary.getNameAsString(COSName.TYPE).equals("Font")) { continue; } // get the variables FontInformation fi = new FontInformation(); fi.fontType = fontDictionary.getNameAsString(COSName.SUBTYPE); String baseFont = fontDictionary.getNameAsString(COSName.BASE_FONT); if (baseFont == null) { continue; } if (Arrays.binarySearch(standard14, baseFont) >= 0) { continue; } COSDictionary fontDescriptor = (COSDictionary) fontDictionary.getDictionaryObject(COSName.FONT_DESC); COSBase enc = fontDictionary.getItem(COSName.ENCODING); COSBase uni = fontDictionary.getItem(COSName.TO_UNICODE); fontDictionary.getInt(COSName.FIRST_CHAR); fontDictionary.getInt(COSName.LAST_CHAR); String encoding; boolean toUnicode = uni != null; if (enc == null) { encoding = "standard14"; } if (enc instanceof COSString) { encoding = ((COSString) enc).getString(); } else { encoding = "table"; } fi.isSubset = false; boolean t = true; // Type one and TT can have subsets defineing the basename see 5.5.3 // pdfref 1.6 // if (fi.fontType.lastIndexOf(COSName.TYPE1.getName())!=-1 || // fi.fontType.equals(COSName.TRUE_TYPE.getName()) ) if (baseFont != null) { if (baseFont.length() > 6) { for (int k = 0; k < 6; k++) if (!Character.isUpperCase(baseFont.charAt(k))) { t = false; } if (baseFont.charAt(6) != '+') { t = false; } } else { t = false; } fi.isSubset = t; if (fi.isSubset) { fi.baseName = baseFont.substring(0, 6); baseFont = baseFont.substring(7); } } fi.fontFlags = 0; if (fi.fontType.equals(COSName.TYPE0.getName()) || fi.fontType.equals(COSName.TYPE3.getName())) { fi.isEmbedded = true; } if (fontDescriptor != null) { // in Type1 charset indicates font is subsetted if (fontDescriptor.getItem(COSName.CHAR_SET) != null) { fi.isSubset = true; } if (fontDescriptor.getItem(COSName.FONT_FILE) != null || fontDescriptor.getItem(COSName.FONT_FILE3) != null || fontDescriptor.getItem(COSName.FONT_FILE2) != null) { fi.isEmbedded = true; } fi.fontFlags = fontDescriptor.getInt(COSName.getPDFName("Flags")); fi.fontFamily = fontDescriptor.getString(COSName.FONT_FAMILY); fi.fontStretch = fontDescriptor.getString(COSName.FONT_STRETCH); } fi.charset = encoding; fi.fontName = baseFont; fi.isToUnicode = toUnicode; fi.encoding = fontDictionary.getNameAsString(COSName.CID_TO_GID_MAP); ret.add(fi); } // for all fonts HashMultimap<String, FontInformation> m = HashMultimap.create(); for (FontInformation ff : ret) { m.put(ff.fontName, ff); } LinkedList<FontInformation> missing = new LinkedList<FontInformation>(); Set<String> k = m.keySet(); for (String kk : k) { Set<FontInformation> s = m.get(kk); if (s.size() < 1) { continue; } if (s.size() > 1) { boolean found = false; FontInformation ff = null; for (FontInformation fonti : s) { if (!fonti.isEmbedded) { ff = fonti; } else { found = true; } } if (!found) { missing.add(ff); } } else { FontInformation ff = s.iterator().next(); if (!ff.isEmbedded) { missing.add(ff); } } } // } // for all pages // Iterator<FontInformation> it = ret.iterator(); // FontInformation prev = null; // LinkedList<FontInformation> toDelete = new // LinkedList<FontInformation>(); // while (it.hasNext()) { // FontInformation current = it.next(); // // if (prev!= null && prev.fontName.equals(current.fontName) && // (prev.fontType.startsWith("CIDFontType") || // current.fontType.startsWith("CIDFontType"))) // toDelete.add(current); // prev = current; // } // // //ret.removeAll(toDelete); // FontInformation[] retArray =toDelete.toArray(new FontInformation[0]); // if (missing.size() == 0) { missing = null; } else { System.out.println("Found missing fonts: " + f); System.out.println(missing); } return new PDFFontResults(new LinkedList<FontInformation>(ret), missing); }
From source file:net.padaf.preflight.font.AbstractFontValidator.java
License:Apache License
/** * Abstract Constructor/*from ww w. j av a 2 s . c o m*/ * @param handler the handled document * @param cObj The cos object representing the font * @throws ValidationException when object creation fails */ public AbstractFontValidator(DocumentHandler handler, COSObject cObj) throws ValidationException { try { this.handler = handler; this.cObj = cObj; this.fDictionary = (COSDictionary) cObj.getObject(); this.pFont = PDFontFactory.createFont(fDictionary); this.fontContainer = instanciateContainer(this.pFont); this.handler.addFont(this.pFont.getCOSObject(), this.fontContainer); } catch (IOException e) { throw new ValidationException("Unable to instantiate a FontValidator object : " + e.getMessage()); } }
From source file:net.padaf.preflight.font.FontValidatorFactory.java
License:Apache License
public FontValidator getFontValidator(COSObject cObj, DocumentHandler handler) throws ValidationException { COSDictionary dic = (COSDictionary) cObj.getObject(); String type = dic.getNameAsString(COSName.getPDFName(DICTIONARY_KEY_TYPE)); String subtype = dic.getNameAsString(COSName.getPDFName(DICTIONARY_KEY_SUBTYPE)); if ((type == null || "".equals(type)) || (subtype == null || "".equals(subtype))) { throw new ValidationException( "Type and/or Subtype keys are missing : " + ERROR_FONTS_DICTIONARY_INVALID); } else {//from www .jav a2 s.c o m if (FONT_DICTIONARY_VALUE_TRUETYPE.equals(subtype)) { return new TrueTypeFontValidator(handler, cObj); } else if (FONT_DICTIONARY_VALUE_MMTYPE.equals(subtype) || FONT_DICTIONARY_VALUE_TYPE1.equals(subtype)) { return new Type1FontValidator(handler, cObj); } else if (FONT_DICTIONARY_VALUE_TYPE3.equals(subtype)) { return new Type3FontValidator(handler, cObj); } else if (FONT_DICTIONARY_VALUE_COMPOSITE.equals(subtype)) { return new CompositeFontValidator(handler, cObj); } else if (FONT_DICTIONARY_VALUE_TYPE2.equals(subtype) || FONT_DICTIONARY_VALUE_TYPE1C.equals(subtype) || FONT_DICTIONARY_VALUE_TYPE0C.equals(subtype) || FONT_DICTIONARY_VALUE_TYPE0.equals(subtype)) { // ---- Font managed by a Composite font. // this dictionary will be checked by a CompositeFontValidator return null; } else { throw new ValidationException("Unknown font type : " + subtype); } } }
From source file:net.padaf.preflight.helpers.FileSpecificationValidationHelper.java
License:Apache License
@Override public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException { List<ValidationError> result = new ArrayList<ValidationError>(0); PDDocument pdfDoc = handler.getDocument(); COSDocument cDoc = pdfDoc.getDocument(); List<?> lCOSObj = cDoc.getObjects(); for (Object o : lCOSObj) { COSObject cObj = (COSObject) o; // ---- If this object represents a Stream // The Dictionary must contain the Length key COSBase cBase = cObj.getObject(); if (cBase instanceof COSDictionary) { COSDictionary dic = (COSDictionary) cBase; String type = dic.getNameAsString(COSName.getPDFName(DICTIONARY_KEY_TYPE)); if (FILE_SPECIFICATION_VALUE_TYPE.equals(type)) { // ---- It is a file specification result.addAll(validateFileSpecification(handler, cObj)); }/*from ww w . j av a 2 s . c o m*/ } } return result; }