List of usage examples for org.apache.pdfbox.pdmodel.common PDStream PDStream
public PDStream(COSStream str)
From source file:cz.muni.pdfjbim.PdfImageExtractor.java
License:Apache License
/** * @deprecated -- do not use doesn't work properly yet * This method extracts images by going through PDF tree structure * @param pdfFile name of input PDF file * @param prefix /*from w ww. ja v a2 s . c o m*/ * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet // * @param silent -- if true error messages are not written to output otherwise they are * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfObjectAccess(String pdfFile, String prefix, String password, Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException { if (binarize == null) { binarize = false; } // checking arguments and setting appropriate variables if (pdfFile == null) { throw new IllegalArgumentException("pdfFile must be defined"); } InputStream inputStream = null; if (password != null) { try { log.debug("PDF probably encrypted, trying to decrypt using given password {}", password); ByteArrayOutputStream decryptedOutputStream = new ByteArrayOutputStream(); PdfReader reader = new PdfReader(pdfFile, password.getBytes(StandardCharsets.UTF_8)); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { try { inputStream = new FileInputStream(pdfFile); } catch (FileNotFoundException ex) { throw new PdfRecompressionException("File wasn't found", ex); } } // if prefix is not set then prefix set to name of pdf without .pdf // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed // and this string set as prefix if ((prefix == null) && (pdfFile.length() > 4)) { prefix = pdfFile.substring(0, pdfFile.length() - 4); } PDFParser parser = null; PDDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getPDDocument(); AccessPermission accessPermissions = doc.getCurrentAccessPermission(); if (!accessPermissions.canExtractContent()) { throw new PdfRecompressionException("Error: You do not have permission to extract images."); } // going page by page List pages = doc.getDocumentCatalog().getAllPages(); for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) { if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) { continue; } PDPage page = (PDPage) pages.get(pageNumber); PDResources resources = page.getResources(); Map xobjs = resources.getXObjects(); if (xobjs != null) { Iterator xobjIter = xobjs.entrySet().iterator(); while (xobjIter.hasNext()) { Map.Entry entry = (Map.Entry) xobjIter.next(); String key = (String) entry.getKey(); PDXObject xobj = (PDXObject) entry.getValue(); Map images; if (xobj instanceof PDXObjectForm) { PDXObjectForm xform = (PDXObjectForm) xobj; images = xform.getResources().getImages(); } else { images = resources.getImages(); } // reading images from each page and saving them to file if (images != null) { Iterator imageIter = images.entrySet().iterator(); while (imageIter.hasNext()) { Map.Entry imEntry = (Map.Entry) imageIter.next(); String imKey = (String) imEntry.getKey(); PDXObjectImage image = (PDXObjectImage) imEntry.getValue(); PDStream pdStr = new PDStream(image.getCOSStream()); List<COSName> filters = pdStr.getFilters(); if (image.getBitsPerComponent() > 1 && !binarize) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE)) { log.info("This is LZWDecoded => skipping"); continue; } if (filters.contains(COSName.JBIG2_DECODE)) { if (skipJBig2Images) { log.warn("Allready compressed according to JBIG2 standard => skipping"); continue; } else { log.debug("JBIG2 image detected"); } } // detection of unsupported filters by pdfBox library if (filters.contains(COSName.JPX_DECODE)) { log.info("Unsupported filter JPXDecode => skipping"); continue; } COSObject cosObj = new COSObject(image.getCOSObject()); int objectNum = cosObj.getObjectNumber().intValue(); int genNum = cosObj.getGenerationNumber().intValue(); log.debug(objectNum + " " + genNum + " obj"); String name = getUniqueFileName(prefix + imKey, image.getSuffix()); log.debug("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); log.debug(pdfImageInfo.toString()); namesOfImages.add(name + "." + image.getSuffix()); } } } } } } catch (IOException ex) { Tools.deleteFilesFromList(namesOfImages); throw new PdfRecompressionException("Unable to parse PDF document", ex); } catch (RuntimeException ex) { Tools.deleteFilesFromList(namesOfImages); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:cz.muni.pdfjbim.PdfImageProcessor.java
License:Apache License
/** * This method extracts images by going through all COSObjects pointed from xref table * @param is input stream containing PDF file * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet//from w ww .j a va 2 s . c om * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfParser(InputStream is, String prefix, String password, Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException { // checking arguments and setting appropriate variables if (binarize == null) { binarize = false; } InputStream inputStream = null; if (password != null) { try { ByteArrayOutputStream decryptedOutputStream = null; PdfReader reader = new PdfReader(is, password.getBytes()); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { inputStream = is; } PDFParser parser = null; COSDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getDocument(); List<COSObject> objs = doc.getObjectsByType(COSName.XOBJECT); if (objs != null) { for (COSObject obj : objs) { COSBase subtype = obj.getItem(COSName.SUBTYPE); if (subtype.toString().equalsIgnoreCase("COSName{Image}")) { COSBase imageObj = obj.getObject(); COSBase cosNameObj = obj.getItem(COSName.NAME); String key; if (cosNameObj != null) { String cosNameKey = cosNameObj.toString(); int startOfKey = cosNameKey.indexOf("{") + 1; key = cosNameKey.substring(startOfKey, cosNameKey.length() - 1); } else { key = "im0"; } int objectNum = obj.getObjectNumber().intValue(); int genNum = obj.getGenerationNumber().intValue(); PDXObjectImage image = (PDXObjectImage) PDXObjectImage.createXObject(imageObj); PDStream pdStr = new PDStream(image.getCOSStream()); List filters = pdStr.getFilters(); if ((image.getBitsPerComponent() > 1) && (!binarize)) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE.getName())) { log.info("This is LZWDecoded => skipping"); continue; } // detection of unsupported filters by pdfBox library if (filters.contains("JBIG2Decode")) { log.warn("Allready compressed according to JBIG2 standard => skipping"); continue; } if (filters.contains("JPXDecode")) { log.warn("Unsupported filter JPXDecode => skipping"); continue; } String name = getUniqueFileName(prefix, image.getSuffix()); log.info("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); namesOfImages.add(name + "." + image.getSuffix()); } // } } } } catch (IOException ex) { throw new PdfRecompressionException("Unable to parse PDF document", ex); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:cz.muni.pdfjbim.PdfImageProcessor.java
License:Apache License
/** * @deprecated -- do not use doesn't work properly yet * This method extracts images by going through PDF tree structure * @param pdfFile name of input PDF file * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet/*from ww w . j a v a 2 s . com*/ * @param silent -- if true error messages are not written to output otherwise they are * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfObjectAccess(String pdfFile, String password, Set<Integer> pagesToProcess, Boolean silent, Boolean binarize) throws PdfRecompressionException { if (binarize == null) { binarize = false; } // checking arguments and setting appropriate variables if (pdfFile == null) { throw new IllegalArgumentException(pdfFile); } String prefix = null; InputStream inputStream = null; if (password != null) { try { ByteArrayOutputStream decryptedOutputStream = null; PdfReader reader = new PdfReader(pdfFile, password.getBytes()); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { try { inputStream = new FileInputStream(pdfFile); } catch (FileNotFoundException ex) { throw new PdfRecompressionException("File wasn't found", ex); } } // if prefix is not set then prefix set to name of pdf without .pdf // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed // and this string set as prefix if ((prefix == null) && (pdfFile.length() > 4)) { prefix = pdfFile.substring(0, pdfFile.length() - 4); } PDFParser parser = null; PDDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getPDDocument(); AccessPermission accessPermissions = doc.getCurrentAccessPermission(); if (!accessPermissions.canExtractContent()) { throw new PdfRecompressionException("Error: You do not have permission to extract images."); } // going page by page List pages = doc.getDocumentCatalog().getAllPages(); for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) { if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) { continue; } PDPage page = (PDPage) pages.get(pageNumber); PDResources resources = page.getResources(); Map xobjs = resources.getXObjects(); if (xobjs != null) { Iterator xobjIter = xobjs.keySet().iterator(); while (xobjIter.hasNext()) { String key = (String) xobjIter.next(); PDXObject xobj = (PDXObject) xobjs.get(key); Map images; if (xobj instanceof PDXObjectForm) { PDXObjectForm xform = (PDXObjectForm) xobj; images = xform.getResources().getImages(); } else { images = resources.getImages(); } // reading images from each page and saving them to file if (images != null) { Iterator imageIter = images.keySet().iterator(); while (imageIter.hasNext()) { String imKey = (String) imageIter.next(); PDXObjectImage image = (PDXObjectImage) images.get(imKey); PDStream pdStr = new PDStream(image.getCOSStream()); List filters = pdStr.getFilters(); if (image.getBitsPerComponent() > 1) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE.getName())) { log.info("This is LZWDecoded => skipping"); continue; } // detection of unsupported filters by pdfBox library if (filters.contains("JBIG2Decode")) { log.info("Allready compressed according to JBIG2 standard => skipping"); continue; } if (filters.contains("JPXDecode")) { log.info("Unsupported filter JPXDecode => skipping"); continue; } COSObject cosObj = new COSObject(image.getCOSObject()); int objectNum = cosObj.getObjectNumber().intValue(); int genNum = cosObj.getGenerationNumber().intValue(); log.debug(objectNum + " " + genNum + " obj"); String name = getUniqueFileName(prefix + imKey, image.getSuffix()); log.debug("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); log.debug(pdfImageInfo.toString()); namesOfImages.add(name + "." + image.getSuffix()); } } } } } } catch (IOException ex) { throw new PdfRecompressionException("Unable to parse PDF document", ex); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:fi.nls.oskari.printout.printing.PDPageContentStream.java
License:Apache License
/** * Create a new PDPage content stream.// w ww . ja v a 2s.c o m * * @param document * The document the page is part of. * @param sourcePage * The page to write the contents to. * @param appendContent * Indicates whether content will be overwritten. If false all * previous content is deleted. * @param compress * Tell if the content stream should compress the page contents. * @param resetContext * Tell if the graphic context should be reseted. * @throws IOException * If there is an error writing to the page contents. */ public PDPageContentStream(PDDocument document, PDPage sourcePage, boolean appendContent, boolean compress, boolean resetContext) throws IOException { page = sourcePage; resources = page.getResources(); if (resources == null) { resources = new PDResources(); page.setResources(resources); } // Get the pdstream from the source page instead of creating a new one PDStream contents = sourcePage.getContents(); boolean hasContent = contents != null; // If request specifies the need to append to the document if (appendContent && hasContent) { // Create a pdstream to append new content PDStream contentsToAppend = new PDStream(document); // This will be the resulting COSStreamArray after existing and new // streams are merged COSStreamArray compoundStream = null; // If contents is already an array, a new stream is simply appended // to it if (contents.getStream() instanceof COSStreamArray) { compoundStream = (COSStreamArray) contents.getStream(); compoundStream.appendStream(contentsToAppend.getStream()); } else { // Creates the COSStreamArray and adds the current stream plus a // new one to it COSArray newArray = new COSArray(); newArray.add(contents.getCOSObject()); newArray.add(contentsToAppend.getCOSObject()); compoundStream = new COSStreamArray(newArray); } if (compress) { List<COSName> filters = new ArrayList<COSName>(); filters.add(COSName.FLATE_DECODE); contentsToAppend.setFilters(filters); } if (resetContext) { // create a new stream to encapsulate the existing stream PDStream saveGraphics = new PDStream(document); output = saveGraphics.createOutputStream(); // save the initial/unmodified graphics context saveGraphicsState(); close(); // ? if (compress) { List<COSName> filters = new ArrayList<COSName>(); filters.add(COSName.FLATE_DECODE); saveGraphics.setFilters(filters); } // insert the new stream at the beginning compoundStream.insertCOSStream(saveGraphics); } // Sets the compoundStream as page contents sourcePage.setContents(new PDStream(compoundStream)); output = contentsToAppend.createOutputStream(); if (resetContext) { // restore the initial/unmodified graphics context restoreGraphicsState(); } } else { if (hasContent) { LOG.warn("You are overwriting an existing content, you should use the append mode"); } contents = new PDStream(document); if (compress) { List<COSName> filters = new ArrayList<COSName>(); filters.add(COSName.FLATE_DECODE); contents.setFilters(filters); } sourcePage.setContents(contents); output = contents.createOutputStream(); } formatDecimal.setMaximumFractionDigits(10); formatDecimal.setGroupingUsed(false); }
From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java
License:Apache License
private PDStream getContents(PDPage page) throws IOException { PDStream pdStream = new PDStream(new COSStream()); OutputStream os = pdStream.createOutputStream(); IOUtils.copy(page.getContents(), os); os.close();/* www.j a v a 2 s.com*/ return pdStream; }
From source file:org.apache.fop.render.pdf.pdfbox.PDFBoxAdapter.java
License:Apache License
private void mergeXObj(COSDictionary sourcePageResources, FontInfo fontinfo, UniqueName uniqueName) throws IOException { COSDictionary xobj = (COSDictionary) sourcePageResources.getDictionaryObject(COSName.XOBJECT); if (xobj != null && pdfDoc.isMergeFontsEnabled()) { for (Map.Entry<COSName, COSBase> i : xobj.entrySet()) { COSObject v = (COSObject) i.getValue(); COSStream stream = (COSStream) v.getObject(); COSDictionary res = (COSDictionary) stream.getDictionaryObject(COSName.RESOURCES); if (res != null) { COSDictionary src = (COSDictionary) res.getDictionaryObject(COSName.FONT); if (src != null) { COSDictionary target = (COSDictionary) sourcePageResources .getDictionaryObject(COSName.FONT); if (target == null) { sourcePageResources.setItem(COSName.FONT, src); } else { for (Map.Entry<COSName, COSBase> entry : src.entrySet()) { if (!target.keySet().contains(entry.getKey())) { target.setItem(uniqueName.getName(entry.getKey()), entry.getValue()); }/*from www.j ava2s. c o m*/ } } PDFWriter writer = new MergeFontsPDFWriter(src, fontinfo, uniqueName, parentFonts, 0); String c = writer.writeText(new PDStream(stream)); if (c != null) { stream.removeItem(COSName.FILTER); newXObj.put(i.getKey(), c); for (Object e : src.keySet().toArray()) { COSName name = (COSName) e; src.setItem(uniqueName.getName(name), src.getItem(name)); src.removeItem(name); } } } } } } }
From source file:org.lockss.pdf.pdfbox.PdfBoxTokenStream.java
License:Open Source License
/** * <p>//from w w w . j a va 2s .co m * Convenience method to create a new {@link PDStream} instance. * </p> * * @return A new {@link PDStream} instance based on this document. * @since 1.56 */ protected PDStream makeNewPdStream() { return new PDStream(pdfBoxPage.pdfBoxDocument.pdDocument); }
From source file:pdf.PdfBuilder.java
public void build(Stammdaten sta) { try (InputStream is = getClass().getResourceAsStream("sta.pdf"); //get pdfobject PDDocument pdfTemplate = PDDocument.load(is)) { PDDocumentCatalog docCatalog = pdfTemplate.getDocumentCatalog(); PDAcroForm acroForm = docCatalog.getAcroForm(); //Set Data /*//from ww w. jav a 2 s . c o m acroForm.getField(PLZ).setValue(sta.getPostleitzahl()); acroForm.getField(SITZ).setValue(sta.getSitz()); acroForm.getField(STAAT).setValue(sta.getStaat()); acroForm.getField(ISO).setValue(sta.getISO()); acroForm.getField(BERUF).setValue(sta.getBeruf()); acroForm.getField(GEBURTSDATUM).setValue(sta.getGeburtsdatum()); acroForm.getField(KREDITNEHMER1).setValue(sta.getKreditnehmer()); */ acroForm.getField(STAAT).setValue("test"); // Generate new File // TODO: change uuid to timestamp File saveFile = new File("STA" + UUID.randomUUID() + ".pdf"); if (saveFile.createNewFile()) { System.out.println("File is created!"); } else { System.out.println("File already exists."); } // Save edited file pdfTemplate.save(saveFile); PDStream ps = new PDStream(pdfTemplate); InputStream finalPDF = ps.createInputStream(); ; } catch (IOException | COSVisitorException ex) { Logger.getLogger(PdfBuilder.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:Utilities.GlobalVar.java
public static void updateSeqNum(PDDocument doc, String cycle) throws IOException { int sequenceNum = 1; List pages = doc.getDocumentCatalog().getAllPages(); for (int i = 0; i < pages.size(); i++) { PDPage page = (PDPage) pages.get(i); PDStream contents = page.getContents(); PDFStreamParser parser = new PDFStreamParser(contents.getStream()); parser.parse();//from w ww .j a va 2 s . co m List tokens = parser.getTokens(); for (int j = 0; j < tokens.size(); j++) { Object next = tokens.get(j); if (next instanceof PDFOperator) { PDFOperator op = (PDFOperator) next; // Tj and TJ are the two operators that display strings in a PDF if (op.getOperation().equals("Tj")) { // Tj takes one operator and that is the string // to display so lets update that operator COSString previous = (COSString) tokens.get(j - 1); String string = previous.getString(); // System.out.println(string); // System.out.println(string.charAt(5)); if (string.contains("/0")) { String seq = cycle + "/" + GlobalVar.globalCountGenerator5Digit(sequenceNum); string = string.replaceFirst(string, seq); previous.reset(); previous.append(string.getBytes("ISO-8859-1")); sequenceNum++; break; } //Word you want to change. Currently this code changes word "Solr" to "Solr123" previous.reset(); previous.append(string.getBytes("ISO-8859-1")); } else if (op.getOperation().equals("TJ")) { COSArray previous = (COSArray) tokens.get(j - 1); for (int k = 0; k < previous.size(); k++) { Object arrElement = previous.getObject(k); if (arrElement instanceof COSString) { COSString cosString = (COSString) arrElement; String string = cosString.getString(); // System.out.println(string); if (string.contains("/00")) { String seq = cycle + "/" + GlobalVar.globalCountGenerator5Digit(sequenceNum); string = string.replaceFirst(string, seq); cosString.reset(); cosString.append(string.getBytes("ISO-8859-1")); sequenceNum++; break; } // Currently this code changes word "Solr" to "Solr123" cosString.reset(); cosString.append(string.getBytes("ISO-8859-1")); // break; } } } } } // now that the tokens are updated we will replace the page content stream. PDStream updatedStream = new PDStream(doc); OutputStream out = updatedStream.createOutputStream(); ContentStreamWriter tokenWriter = new ContentStreamWriter(out); tokenWriter.writeTokens(tokens); page.setContents(updatedStream); } }