Example usage for org.apache.pdfbox.cos COSObject COSObject

Introduction

In this page you can find the example usage for org.apache.pdfbox.cos COSObject COSObject.

Prototype

public COSObject(COSBase object)

Source Link

Document

Constructor.

Usage

From source file:com.aaasec.sigserv.csspsupport.pdfbox.modifications.CsCOSWriter.java

License:Apache License

/**
 * visitFromStream method comment./*from   w w w.j ava  2 s.c  o  m*/
 *
 * @param obj The object that is being visited.
 *
 * @throws COSVisitorException If there is an exception while visiting this
 * object.
 *
 * @return null
 */
public Object visitFromStream(COSStream obj) throws COSVisitorException {
    InputStream input = null;
    try {
        if (willEncrypt) {
            document.getSecurityHandler().encryptStream(obj, currentObjectKey.getNumber(),
                    currentObjectKey.getGeneration());
        }

        COSObject lengthObject = null;
        // check if the length object is required to be direct, like in
        // a cross reference stream dictionary
        COSBase lengthEntry = obj.getDictionaryObject(COSName.LENGTH);
        String type = obj.getNameAsString(COSName.TYPE);
        if (lengthEntry != null && lengthEntry.isDirect() || "XRef".equals(type)) {
            // the length might be the non encoded length,
            // set the real one as direct object
            COSInteger cosInteger = COSInteger.get(obj.getFilteredLength());
            cosInteger.setDirect(true);
            obj.setItem(COSName.LENGTH, cosInteger);

        } else {
            // make the length an implicit indirect object
            // set the length of the stream and write stream dictionary
            lengthObject = new COSObject(null);

            obj.setItem(COSName.LENGTH, lengthObject);
        }
        input = obj.getFilteredStream();
        //obj.accept(this);
        // write the stream content
        visitFromDictionary(obj);
        getStandardOutput().write(STREAM);
        getStandardOutput().writeCRLF();
        byte[] buffer = new byte[1024];
        int amountRead = 0;
        int totalAmountWritten = 0;
        while ((amountRead = input.read(buffer, 0, 1024)) != -1) {
            getStandardOutput().write(buffer, 0, amountRead);
            totalAmountWritten += amountRead;
        }
        // set the length as an indirect object
        if (lengthObject != null) {
            lengthObject.setObject(COSInteger.get(totalAmountWritten));
        }
        getStandardOutput().writeCRLF();
        getStandardOutput().write(ENDSTREAM);
        getStandardOutput().writeEOL();
        return null;
    } catch (Exception e) {
        throw new COSVisitorException(e);
    } finally {
        if (input != null) {
            try {
                input.close();
            } catch (IOException e) {
                throw new COSVisitorException(e);
            }
        }
    }
}

From source file:cz.muni.pdfjbim.PdfImageExtractor.java

License:Apache License

/**
 * @deprecated -- do not use doesn't work properly yet
 * This method extracts images by going through PDF tree structure
 * @param pdfFile name of input PDF file
 * @param prefix //from   w  w  w  . j  a  v  a  2s.c  o m
 * @param password password for access to PDF if needed
 * @param pagesToProcess list of pages which should be processed if null given => processed all pages
 *      -- not working yet
//    * @param silent -- if true error messages are not written to output otherwise they are
 * @param binarize -- enables processing of nonbitonal images as well (LZW is still not
 *      processed because of output with inverted colors)
 * @throws PdfRecompressionException if problem to extract images from PDF
 */
public void extractImagesUsingPdfObjectAccess(String pdfFile, String prefix, String password,
        Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException {
    if (binarize == null) {
        binarize = false;
    }
    // checking arguments and setting appropriate variables
    if (pdfFile == null) {
        throw new IllegalArgumentException("pdfFile must be defined");
    }

    InputStream inputStream = null;
    if (password != null) {
        try {
            log.debug("PDF probably encrypted, trying to decrypt using given password {}", password);
            ByteArrayOutputStream decryptedOutputStream = new ByteArrayOutputStream();
            PdfReader reader = new PdfReader(pdfFile, password.getBytes(StandardCharsets.UTF_8));
            PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream);
            stamper.close();
            inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray());
        } catch (DocumentException ex) {
            throw new PdfRecompressionException(ex);
        } catch (IOException ex) {
            throw new PdfRecompressionException("Reading file caused exception", ex);
        }
    } else {
        try {
            inputStream = new FileInputStream(pdfFile);
        } catch (FileNotFoundException ex) {
            throw new PdfRecompressionException("File wasn't found", ex);
        }
    }

    // if prefix is not set then prefix set to name of pdf without .pdf
    // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed
    // and this string set as prefix
    if ((prefix == null) && (pdfFile.length() > 4)) {
        prefix = pdfFile.substring(0, pdfFile.length() - 4);
    }

    PDFParser parser = null;
    PDDocument doc = null;
    try {
        parser = new PDFParser(inputStream);
        parser.parse();
        doc = parser.getPDDocument();

        AccessPermission accessPermissions = doc.getCurrentAccessPermission();

        if (!accessPermissions.canExtractContent()) {
            throw new PdfRecompressionException("Error: You do not have permission to extract images.");
        }

        // going page by page
        List pages = doc.getDocumentCatalog().getAllPages();
        for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) {
            if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) {
                continue;
            }
            PDPage page = (PDPage) pages.get(pageNumber);
            PDResources resources = page.getResources();
            Map xobjs = resources.getXObjects();

            if (xobjs != null) {
                Iterator xobjIter = xobjs.entrySet().iterator();
                while (xobjIter.hasNext()) {
                    Map.Entry entry = (Map.Entry) xobjIter.next();
                    String key = (String) entry.getKey();
                    PDXObject xobj = (PDXObject) entry.getValue();
                    Map images;
                    if (xobj instanceof PDXObjectForm) {
                        PDXObjectForm xform = (PDXObjectForm) xobj;
                        images = xform.getResources().getImages();
                    } else {
                        images = resources.getImages();
                    }

                    // reading images from each page and saving them to file
                    if (images != null) {
                        Iterator imageIter = images.entrySet().iterator();
                        while (imageIter.hasNext()) {
                            Map.Entry imEntry = (Map.Entry) imageIter.next();
                            String imKey = (String) imEntry.getKey();
                            PDXObjectImage image = (PDXObjectImage) imEntry.getValue();

                            PDStream pdStr = new PDStream(image.getCOSStream());
                            List<COSName> filters = pdStr.getFilters();

                            if (image.getBitsPerComponent() > 1 && !binarize) {
                                log.info("It is not a bitonal image => skipping");
                                continue;
                            }

                            // at this moment for preventing bad output (bad coloring) from LZWDecode filter
                            if (filters.contains(COSName.LZW_DECODE)) {
                                log.info("This is LZWDecoded => skipping");
                                continue;

                            }

                            if (filters.contains(COSName.JBIG2_DECODE)) {
                                if (skipJBig2Images) {
                                    log.warn("Allready compressed according to JBIG2 standard => skipping");
                                    continue;
                                } else {
                                    log.debug("JBIG2 image detected");
                                }
                            }

                            // detection of unsupported filters by pdfBox library
                            if (filters.contains(COSName.JPX_DECODE)) {
                                log.info("Unsupported filter JPXDecode => skipping");
                                continue;
                            }

                            COSObject cosObj = new COSObject(image.getCOSObject());
                            int objectNum = cosObj.getObjectNumber().intValue();
                            int genNum = cosObj.getGenerationNumber().intValue();
                            log.debug(objectNum + " " + genNum + " obj");

                            String name = getUniqueFileName(prefix + imKey, image.getSuffix());
                            log.debug("Writing image:" + name);
                            image.write2file(name);

                            PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(),
                                    image.getHeight(), objectNum, genNum);
                            originalImageInformations.add(pdfImageInfo);
                            log.debug(pdfImageInfo.toString());

                            namesOfImages.add(name + "." + image.getSuffix());
                        }
                    }
                }
            }
        }
    } catch (IOException ex) {
        Tools.deleteFilesFromList(namesOfImages);
        throw new PdfRecompressionException("Unable to parse PDF document", ex);
    } catch (RuntimeException ex) {
        Tools.deleteFilesFromList(namesOfImages);
    } finally {
        if (doc != null) {
            try {
                doc.close();
            } catch (IOException ex) {
                throw new PdfRecompressionException(ex);
            }
        }
    }
}

From source file:cz.muni.pdfjbim.PdfImageProcessor.java

License:Apache License

/**
 * @deprecated -- do not use doesn't work properly yet
 * This method extracts images by going through PDF tree structure
 * @param pdfFile name of input PDF file
 * @param password password for access to PDF if needed
 * @param pagesToProcess list of pages which should be processed if null given => processed all pages
 *      -- not working yet//  w  ww.j  a v a  2  s.  co  m
 * @param silent -- if true error messages are not written to output otherwise they are
 * @param binarize -- enables processing of nonbitonal images as well (LZW is still not
 *      processed because of output with inverted colors)
 * @throws PdfRecompressionException if problem to extract images from PDF
 */
public void extractImagesUsingPdfObjectAccess(String pdfFile, String password, Set<Integer> pagesToProcess,
        Boolean silent, Boolean binarize) throws PdfRecompressionException {
    if (binarize == null) {
        binarize = false;
    }
    // checking arguments and setting appropriate variables
    if (pdfFile == null) {
        throw new IllegalArgumentException(pdfFile);
    }

    String prefix = null;

    InputStream inputStream = null;
    if (password != null) {
        try {
            ByteArrayOutputStream decryptedOutputStream = null;
            PdfReader reader = new PdfReader(pdfFile, password.getBytes());
            PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream);
            stamper.close();
            inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray());
        } catch (DocumentException ex) {
            throw new PdfRecompressionException(ex);
        } catch (IOException ex) {
            throw new PdfRecompressionException("Reading file caused exception", ex);
        }
    } else {
        try {
            inputStream = new FileInputStream(pdfFile);
        } catch (FileNotFoundException ex) {
            throw new PdfRecompressionException("File wasn't found", ex);
        }
    }

    // if prefix is not set then prefix set to name of pdf without .pdf
    // if pdfFile has unconsistent name (without suffix .pdf) and name longer than 4 chars then last for chars are removed
    // and this string set as prefix
    if ((prefix == null) && (pdfFile.length() > 4)) {
        prefix = pdfFile.substring(0, pdfFile.length() - 4);
    }

    PDFParser parser = null;
    PDDocument doc = null;
    try {
        parser = new PDFParser(inputStream);
        parser.parse();
        doc = parser.getPDDocument();

        AccessPermission accessPermissions = doc.getCurrentAccessPermission();

        if (!accessPermissions.canExtractContent()) {
            throw new PdfRecompressionException("Error: You do not have permission to extract images.");
        }

        // going page by page
        List pages = doc.getDocumentCatalog().getAllPages();
        for (int pageNumber = 0; pageNumber < pages.size(); pageNumber++) {
            if ((pagesToProcess != null) && (!pagesToProcess.contains(pageNumber + 1))) {
                continue;
            }
            PDPage page = (PDPage) pages.get(pageNumber);
            PDResources resources = page.getResources();
            Map xobjs = resources.getXObjects();

            if (xobjs != null) {
                Iterator xobjIter = xobjs.keySet().iterator();
                while (xobjIter.hasNext()) {
                    String key = (String) xobjIter.next();
                    PDXObject xobj = (PDXObject) xobjs.get(key);
                    Map images;
                    if (xobj instanceof PDXObjectForm) {
                        PDXObjectForm xform = (PDXObjectForm) xobj;
                        images = xform.getResources().getImages();
                    } else {
                        images = resources.getImages();
                    }

                    // reading images from each page and saving them to file
                    if (images != null) {
                        Iterator imageIter = images.keySet().iterator();
                        while (imageIter.hasNext()) {
                            String imKey = (String) imageIter.next();
                            PDXObjectImage image = (PDXObjectImage) images.get(imKey);

                            PDStream pdStr = new PDStream(image.getCOSStream());
                            List filters = pdStr.getFilters();

                            if (image.getBitsPerComponent() > 1) {
                                log.info("It is not a bitonal image => skipping");
                                continue;
                            }

                            // at this moment for preventing bad output (bad coloring) from LZWDecode filter
                            if (filters.contains(COSName.LZW_DECODE.getName())) {
                                log.info("This is LZWDecoded => skipping");
                                continue;

                            }

                            // detection of unsupported filters by pdfBox library
                            if (filters.contains("JBIG2Decode")) {
                                log.info("Allready compressed according to JBIG2 standard => skipping");
                                continue;
                            }
                            if (filters.contains("JPXDecode")) {
                                log.info("Unsupported filter JPXDecode => skipping");
                                continue;
                            }

                            COSObject cosObj = new COSObject(image.getCOSObject());
                            int objectNum = cosObj.getObjectNumber().intValue();
                            int genNum = cosObj.getGenerationNumber().intValue();
                            log.debug(objectNum + " " + genNum + " obj");

                            String name = getUniqueFileName(prefix + imKey, image.getSuffix());
                            log.debug("Writing image:" + name);
                            image.write2file(name);

                            PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(),
                                    image.getHeight(), objectNum, genNum);
                            originalImageInformations.add(pdfImageInfo);
                            log.debug(pdfImageInfo.toString());

                            namesOfImages.add(name + "." + image.getSuffix());
                        }
                    }

                }
            }

        }
    } catch (IOException ex) {
        throw new PdfRecompressionException("Unable to parse PDF document", ex);
    } finally {
        if (doc != null) {
            try {
                doc.close();
            } catch (IOException ex) {
                throw new PdfRecompressionException(ex);
            }
        }
    }
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsInteger() {
    try {//from w ww  .ja v a2  s .com
        COSObject co = new COSObject(new COSInteger(10));
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isInteger(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isInteger(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsFloat() {
    try {/*from w w  w . j av  a 2s  .c o  m*/
        COSObject co = new COSObject(new COSFloat(10.0f));
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isFloat(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isFloat(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsString() {
    try {//  w w w  .  j a v a2  s  .co  m
        COSObject co = new COSObject(new COSString(""));
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isString(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isString(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsStream() {
    try {/*from   w  w  w.j  a  va  2s . co m*/
        COSObject co = new COSObject(new COSStream(null));
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isStream(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isStream(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsDictionary() {
    try {// ww w  .  j  a  v  a2s .  c  o  m
        COSObject co = new COSObject(new COSDictionary());
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isDictionary(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isDictionary(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}

From source file:net.padaf.preflight.utils.TestCOSUtils.java

License:Apache License

@Test
public void testIsArray() {
    try {//from   w  w  w.jav  a2s .  c o  m
        COSObject co = new COSObject(new COSArray());
        co.setGenerationNumber(new COSInteger(0));
        co.setObjectNumber(new COSInteger(10));

        assertFalse(COSUtils.isArray(co, new IOCOSDocument()));

        COSDocument doc = new COSDocument();
        doc.setXRef(new COSObjectKey(co), 1000);
        COSUtils.isArray(co, doc);
        doc.close();
    } catch (IOException e) {
        fail(e.getMessage());
    }
}

From source file:org.apache.fop.render.pdf.StructureTreeMergerTestCase.java

License:Apache License

@Test
public void testCheckNullCOSObject() throws IOException {
    setUp();//ww w .  java  2s  .c o m
    PDDocument doc = PDDocument.load(new File(getClass().getResource(BrokenLink).getFile()));
    PDPage srcPage = doc.getPage(0);
    PageParentTreeFinder finder = new PageParentTreeFinder(srcPage);
    COSArray markedContentParents = finder.getPageParentTreeArray(doc);
    COSObject nullObj = new COSObject(null);
    nullObj.setObjectNumber(100);
    nullObj.setGenerationNumber(0);
    PDFStructElem elem = new PDFStructElem();
    elem.setObjectNumber(2);
    COSObject parent = (COSObject) markedContentParents.get(1);
    COSArray kids = (COSArray) parent.getDictionaryObject(COSName.K);
    COSDictionary kid = (COSDictionary) kids.get(1);
    kid.setItem(COSName.OBJ, nullObj);
    adapter = new PDFBoxAdapter(pdfPage, new HashMap(), new HashMap<Integer, PDFArray>());
    PDFLogicalStructureHandler handler = setUpPDFLogicalStructureHandler();
    StructureTreeMerger merger = new StructureTreeMerger(elem, handler, adapter, srcPage);
    merger.copyStructure(markedContentParents);
    PDFArray array = handler.getPageParentTree();
    PDFStructElem parentElem = (PDFStructElem) array.get(1);
    PDFDictionary objrDict = (PDFDictionary) parentElem.getKids().get(1);
    Assert.assertNull(objrDict.get("Obj"));
}