List of usage examples for org.apache.pdfbox.cos COSObject COSObject
public COSObject(COSBase object)
From source file:com.aaasec.sigserv.csspsupport.pdfbox.modifications.CsCOSWriter.java
License:Apache License
/** * visitFromStream method comment./*from w w w.j ava 2 s.c o m*/ * * @param obj The object that is being visited. * * @throws COSVisitorException If there is an exception while visiting this * object. * * @return null */ public Object visitFromStream(COSStream obj) throws COSVisitorException { InputStream input = null; try { if (willEncrypt) { document.getSecurityHandler().encryptStream(obj, currentObjectKey.getNumber(), currentObjectKey.getGeneration()); } COSObject lengthObject = null; // check if the length object is required to be direct, like in // a cross reference stream dictionary COSBase lengthEntry = obj.getDictionaryObject(COSName.LENGTH); String type = obj.getNameAsString(COSName.TYPE); if (lengthEntry != null && lengthEntry.isDirect() || "XRef".equals(type)) { // the length might be the non encoded length, // set the real one as direct object COSInteger cosInteger = COSInteger.get(obj.getFilteredLength()); cosInteger.setDirect(true); obj.setItem(COSName.LENGTH, cosInteger); } else { // make the length an implicit indirect object // set the length of the stream and write stream dictionary lengthObject = new COSObject(null); obj.setItem(COSName.LENGTH, lengthObject); } input = obj.getFilteredStream(); //obj.accept(this); // write the stream content visitFromDictionary(obj); getStandardOutput().write(STREAM); getStandardOutput().writeCRLF(); byte[] buffer = new byte[1024]; int amountRead = 0; int totalAmountWritten = 0; while ((amountRead = input.read(buffer, 0, 1024)) != -1) { getStandardOutput().write(buffer, 0, amountRead); totalAmountWritten += amountRead; } // set the length as an indirect object if (lengthObject != null) { lengthObject.setObject(COSInteger.get(totalAmountWritten)); } getStandardOutput().writeCRLF(); getStandardOutput().write(ENDSTREAM); getStandardOutput().writeEOL(); return null; } catch (Exception e) { throw new COSVisitorException(e); } finally { if (input != null) { try { input.close(); } catch (IOException e) { throw new COSVisitorException(e); } } } }
From source file:cz.muni.pdfjbim.PdfImageExtractor.java
License:Apache License
/** * @deprecated -- do not use doesn't work properly yet * This method extracts images by going through PDF tree structure * @param pdfFile name of input PDF file * @param prefix //from w w w . j a v a 2s.c o m * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet // * @param silent -- if true error messages are not written to output otherwise they are * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfObjectAccess(String pdfFile, String prefix, String password, Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException { if (binarize == null) { binarize = false; } // checking arguments and setting appropriate variables if (pdfFile == null) { throw new IllegalArgumentException("pdfFile must be defined"); } InputStream inputStream = null; if (password != null) { try { log.debug("PDF probably encrypted, trying to decrypt using given password {}", password); ByteArrayOutputStream decryptedOutputStream = new ByteArrayOutputStream(); PdfReader reader = new PdfReader(pdfFile, password.getBytes(StandardCharsets.UTF_8)); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { try { inputStream = new FileInputStream(pdfFile); } catch (FileNotFoundException ex) { throw new PdfRecompressionException("File wasn't found", ex); } } // if prefix is not set then prefix set to name of pdf without .pdf // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed // and this string set as prefix if ((prefix == null) && (pdfFile.length() > 4)) { prefix = pdfFile.substring(0, pdfFile.length() - 4); } PDFParser parser = null; PDDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getPDDocument(); AccessPermission accessPermissions = doc.getCurrentAccessPermission(); if (!accessPermissions.canExtractContent()) { throw new PdfRecompressionException("Error: You do not have permission to extract images."); } // going page by page List pages = doc.getDocumentCatalog().getAllPages(); for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) { if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) { continue; } PDPage page = (PDPage) pages.get(pageNumber); PDResources resources = page.getResources(); Map xobjs = resources.getXObjects(); if (xobjs != null) { Iterator xobjIter = xobjs.entrySet().iterator(); while (xobjIter.hasNext()) { Map.Entry entry = (Map.Entry) xobjIter.next(); String key = (String) entry.getKey(); PDXObject xobj = (PDXObject) entry.getValue(); Map images; if (xobj instanceof PDXObjectForm) { PDXObjectForm xform = (PDXObjectForm) xobj; images = xform.getResources().getImages(); } else { images = resources.getImages(); } // reading images from each page and saving them to file if (images != null) { Iterator imageIter = images.entrySet().iterator(); while (imageIter.hasNext()) { Map.Entry imEntry = (Map.Entry) imageIter.next(); String imKey = (String) imEntry.getKey(); PDXObjectImage image = (PDXObjectImage) imEntry.getValue(); PDStream pdStr = new PDStream(image.getCOSStream()); List<COSName> filters = pdStr.getFilters(); if (image.getBitsPerComponent() > 1 && !binarize) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE)) { log.info("This is LZWDecoded => skipping"); continue; } if (filters.contains(COSName.JBIG2_DECODE)) { if (skipJBig2Images) { log.warn("Allready compressed according to JBIG2 standard => skipping"); continue; } else { log.debug("JBIG2 image detected"); } } // detection of unsupported filters by pdfBox library if (filters.contains(COSName.JPX_DECODE)) { log.info("Unsupported filter JPXDecode => skipping"); continue; } COSObject cosObj = new COSObject(image.getCOSObject()); int objectNum = cosObj.getObjectNumber().intValue(); int genNum = cosObj.getGenerationNumber().intValue(); log.debug(objectNum + " " + genNum + " obj"); String name = getUniqueFileName(prefix + imKey, image.getSuffix()); log.debug("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); log.debug(pdfImageInfo.toString()); namesOfImages.add(name + "." + image.getSuffix()); } } } } } } catch (IOException ex) { Tools.deleteFilesFromList(namesOfImages); throw new PdfRecompressionException("Unable to parse PDF document", ex); } catch (RuntimeException ex) { Tools.deleteFilesFromList(namesOfImages); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:cz.muni.pdfjbim.PdfImageProcessor.java
License:Apache License
/** * @deprecated -- do not use doesn't work properly yet * This method extracts images by going through PDF tree structure * @param pdfFile name of input PDF file * @param password password for access to PDF if needed * @param pagesToProcess list of pages which should be processed if null given => processed all pages * -- not working yet// w ww.j a v a 2 s. co m * @param silent -- if true error messages are not written to output otherwise they are * @param binarize -- enables processing of nonbitonal images as well (LZW is still not * processed because of output with inverted colors) * @throws PdfRecompressionException if problem to extract images from PDF */ public void extractImagesUsingPdfObjectAccess(String pdfFile, String password, Set<Integer> pagesToProcess, Boolean silent, Boolean binarize) throws PdfRecompressionException { if (binarize == null) { binarize = false; } // checking arguments and setting appropriate variables if (pdfFile == null) { throw new IllegalArgumentException(pdfFile); } String prefix = null; InputStream inputStream = null; if (password != null) { try { ByteArrayOutputStream decryptedOutputStream = null; PdfReader reader = new PdfReader(pdfFile, password.getBytes()); PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream); stamper.close(); inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray()); } catch (DocumentException ex) { throw new PdfRecompressionException(ex); } catch (IOException ex) { throw new PdfRecompressionException("Reading file caused exception", ex); } } else { try { inputStream = new FileInputStream(pdfFile); } catch (FileNotFoundException ex) { throw new PdfRecompressionException("File wasn't found", ex); } } // if prefix is not set then prefix set to name of pdf without .pdf // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed // and this string set as prefix if ((prefix == null) && (pdfFile.length() > 4)) { prefix = pdfFile.substring(0, pdfFile.length() - 4); } PDFParser parser = null; PDDocument doc = null; try { parser = new PDFParser(inputStream); parser.parse(); doc = parser.getPDDocument(); AccessPermission accessPermissions = doc.getCurrentAccessPermission(); if (!accessPermissions.canExtractContent()) { throw new PdfRecompressionException("Error: You do not have permission to extract images."); } // going page by page List pages = doc.getDocumentCatalog().getAllPages(); for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) { if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) { continue; } PDPage page = (PDPage) pages.get(pageNumber); PDResources resources = page.getResources(); Map xobjs = resources.getXObjects(); if (xobjs != null) { Iterator xobjIter = xobjs.keySet().iterator(); while (xobjIter.hasNext()) { String key = (String) xobjIter.next(); PDXObject xobj = (PDXObject) xobjs.get(key); Map images; if (xobj instanceof PDXObjectForm) { PDXObjectForm xform = (PDXObjectForm) xobj; images = xform.getResources().getImages(); } else { images = resources.getImages(); } // reading images from each page and saving them to file if (images != null) { Iterator imageIter = images.keySet().iterator(); while (imageIter.hasNext()) { String imKey = (String) imageIter.next(); PDXObjectImage image = (PDXObjectImage) images.get(imKey); PDStream pdStr = new PDStream(image.getCOSStream()); List filters = pdStr.getFilters(); if (image.getBitsPerComponent() > 1) { log.info("It is not a bitonal image => skipping"); continue; } // at this moment for preventing bad output (bad coloring) from LZWDecode filter if (filters.contains(COSName.LZW_DECODE.getName())) { log.info("This is LZWDecoded => skipping"); continue; } // detection of unsupported filters by pdfBox library if (filters.contains("JBIG2Decode")) { log.info("Allready compressed according to JBIG2 standard => skipping"); continue; } if (filters.contains("JPXDecode")) { log.info("Unsupported filter JPXDecode => skipping"); continue; } COSObject cosObj = new COSObject(image.getCOSObject()); int objectNum = cosObj.getObjectNumber().intValue(); int genNum = cosObj.getGenerationNumber().intValue(); log.debug(objectNum + " " + genNum + " obj"); String name = getUniqueFileName(prefix + imKey, image.getSuffix()); log.debug("Writing image:" + name); image.write2file(name); PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(), image.getHeight(), objectNum, genNum); originalImageInformations.add(pdfImageInfo); log.debug(pdfImageInfo.toString()); namesOfImages.add(name + "." + image.getSuffix()); } } } } } } catch (IOException ex) { throw new PdfRecompressionException("Unable to parse PDF document", ex); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { throw new PdfRecompressionException(ex); } } } }
From source file:net.padaf.preflight.utils.TestCOSUtils.java
License:Apache License
@Test public void testIsInteger() { try {//from w ww .ja v a2 s .com COSObject co = new COSObject(new COSInteger(10)); co.setGenerationNumber(new COSInteger(0)); co.setObjectNumber(new COSInteger(10)); assertFalse(COSUtils.isInteger(co, new IOCOSDocument())); COSDocument doc = new COSDocument(); doc.setXRef(new COSObjectKey(co), 1000); COSUtils.isInteger(co, doc); doc.close(); } catch (IOException e) { fail(e.getMessage()); } }
From source file:net.padaf.preflight.utils.TestCOSUtils.java
License:Apache License
@Test public void testIsFloat() { try {/*from w w w . j av a 2s .c o m*/ COSObject co = new COSObject(new COSFloat(10.0f)); co.setGenerationNumber(new COSInteger(0)); co.setObjectNumber(new COSInteger(10)); assertFalse(COSUtils.isFloat(co, new IOCOSDocument())); COSDocument doc = new COSDocument(); doc.setXRef(new COSObjectKey(co), 1000); COSUtils.isFloat(co, doc); doc.close(); } catch (IOException e) { fail(e.getMessage()); } }
From source file:net.padaf.preflight.utils.TestCOSUtils.java
License:Apache License
@Test public void testIsString() { try {// w w w . j a v a2 s .co m COSObject co = new COSObject(new COSString("")); co.setGenerationNumber(new COSInteger(0)); co.setObjectNumber(new COSInteger(10)); assertFalse(COSUtils.isString(co, new IOCOSDocument())); COSDocument doc = new COSDocument(); doc.setXRef(new COSObjectKey(co), 1000); COSUtils.isString(co, doc); doc.close(); } catch (IOException e) { fail(e.getMessage()); } }
From source file:net.padaf.preflight.utils.TestCOSUtils.java
License:Apache License
@Test public void testIsStream() { try {/*from w w w.j a va 2s . co m*/ COSObject co = new COSObject(new COSStream(null)); co.setGenerationNumber(new COSInteger(0)); co.setObjectNumber(new COSInteger(10)); assertFalse(COSUtils.isStream(co, new IOCOSDocument())); COSDocument doc = new COSDocument(); doc.setXRef(new COSObjectKey(co), 1000); COSUtils.isStream(co, doc); doc.close(); } catch (IOException e) { fail(e.getMessage()); } }
From source file:net.padaf.preflight.utils.TestCOSUtils.java
License:Apache License
@Test public void testIsDictionary() { try {// ww w . j a v a2s . c o m COSObject co = new COSObject(new COSDictionary()); co.setGenerationNumber(new COSInteger(0)); co.setObjectNumber(new COSInteger(10)); assertFalse(COSUtils.isDictionary(co, new IOCOSDocument())); COSDocument doc = new COSDocument(); doc.setXRef(new COSObjectKey(co), 1000); COSUtils.isDictionary(co, doc); doc.close(); } catch (IOException e) { fail(e.getMessage()); } }
From source file:net.padaf.preflight.utils.TestCOSUtils.java
License:Apache License
@Test public void testIsArray() { try {//from w w w.jav a2s . c o m COSObject co = new COSObject(new COSArray()); co.setGenerationNumber(new COSInteger(0)); co.setObjectNumber(new COSInteger(10)); assertFalse(COSUtils.isArray(co, new IOCOSDocument())); COSDocument doc = new COSDocument(); doc.setXRef(new COSObjectKey(co), 1000); COSUtils.isArray(co, doc); doc.close(); } catch (IOException e) { fail(e.getMessage()); } }
From source file:org.apache.fop.render.pdf.StructureTreeMergerTestCase.java
License:Apache License
@Test public void testCheckNullCOSObject() throws IOException { setUp();//ww w . java 2s .c o m PDDocument doc = PDDocument.load(new File(getClass().getResource(BrokenLink).getFile())); PDPage srcPage = doc.getPage(0); PageParentTreeFinder finder = new PageParentTreeFinder(srcPage); COSArray markedContentParents = finder.getPageParentTreeArray(doc); COSObject nullObj = new COSObject(null); nullObj.setObjectNumber(100); nullObj.setGenerationNumber(0); PDFStructElem elem = new PDFStructElem(); elem.setObjectNumber(2); COSObject parent = (COSObject) markedContentParents.get(1); COSArray kids = (COSArray) parent.getDictionaryObject(COSName.K); COSDictionary kid = (COSDictionary) kids.get(1); kid.setItem(COSName.OBJ, nullObj); adapter = new PDFBoxAdapter(pdfPage, new HashMap(), new HashMap<Integer, PDFArray>()); PDFLogicalStructureHandler handler = setUpPDFLogicalStructureHandler(); StructureTreeMerger merger = new StructureTreeMerger(elem, handler, adapter, srcPage); merger.copyStructure(markedContentParents); PDFArray array = handler.getPageParentTree(); PDFStructElem parentElem = (PDFStructElem) array.get(1); PDFDictionary objrDict = (PDFDictionary) parentElem.getKids().get(1); Assert.assertNull(objrDict.get("Obj")); }