Example usage for org.apache.pdfbox.cos COSObject getItem

List of usage examples for org.apache.pdfbox.cos COSObject getItem

Introduction

In this page you can find the example usage for org.apache.pdfbox.cos COSObject getItem.

Prototype

public COSBase getItem(COSName key) 

Source Link

Document

This will get the dictionary object in this object that has the name key.

Usage

From source file:at.gv.egiz.pdfas.lib.impl.stamping.pdfbox.PDFBoxFont.java

License:EUPL

private PDFont findCachedFont(PDFBOXObject pdfObject, FontInfoCache fontInfo) {
    try {/*from  ww  w.j  a  va  2 s. c  o m*/
        if (pdfObject.getFontCache().containsKey(fontInfo.fontPath)) {
            return pdfObject.getFontCache().get(fontInfo.fontPath);
        }

        List<COSObject> cosObjects = pdfObject.getDocument().getDocument().getObjectsByType(COSName.FONT);

        //COSName cosFontName = COSName.getPDFName(fontInfo.fontName);
        //COSName cosFontFamily = COSName.getPDFName(fontInfo.fontFamily);

        Iterator<COSObject> cosObjectIt = cosObjects.iterator();

        while (cosObjectIt.hasNext()) {
            COSObject cosObject = cosObjectIt.next();
            COSDictionary baseObject = (COSDictionary) cosObject.getObject();
            if (baseObject instanceof COSDictionary) {
                COSDictionary fontDictionary = (COSDictionary) baseObject;
                COSBase subType = cosObject.getItem(COSName.SUBTYPE);
                COSDictionary fontDescriptor = (COSDictionary) cosObject.getDictionaryObject(COSName.FONT_DESC);
                if (fontDescriptor != null) {
                    String fontName = fontDescriptor.getNameAsString(COSName.FONT_NAME);
                    String fontFamily = fontDescriptor.getNameAsString(COSName.FONT_FAMILY);
                    logger.trace("Inspecting Font {} - {}", fontFamily, fontName);
                    if (COSName.TRUE_TYPE.equals(subType)) {
                        if (fontInfo.fontName != null && fontInfo.fontName.equals(fontName)
                                && fontInfo.fontFamily != null && fontInfo.fontFamily.equals(fontFamily)) {
                            // Found it! :)
                            logger.info("Found Font {}", fontInfo.fontName);
                            return new PDTrueTypeFont(fontDictionary);
                        }
                    } else {
                        logger.debug("Font not a TTF");
                    }
                }
            } else {
                logger.debug("Font not a COSDictionary");
            }
        }
    } catch (Exception e) {
        logger.info("Failed to load existing TTF fonts!", e);
    }
    return null;
}

From source file:cz.muni.pdfjbim.PdfImageExtractor.java

License:Apache License

/**
 * This method extracts images by going through all COSObjects pointed from xref table
 * @param is input stream containing PDF file
 * @param prefix output basename for images
 * @param password password for access to PDF if needed
 * @param pagesToProcess list of pages which should be processed if null given => processed all pages
 *      -- not working yet//from w  w w . ja v a 2s .c o m
 * @param binarize -- enables processing of nonbitonal images as well (LZW is still not
 *      processed because of output with inverted colors)
 * @throws PdfRecompressionException if problem to extract images from PDF
 */
public void extractImagesUsingPdfParser(InputStream is, String prefix, String password,
        Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException {
    // checking arguments and setting appropriate variables
    if (binarize == null) {
        binarize = false;
    }

    log.debug("Extracting images (binarize set to {})", binarize);

    InputStream inputStream = null;
    if (password != null) {
        try (ByteArrayOutputStream decryptedOutputStream = new ByteArrayOutputStream()) {
            PdfReader reader = new PdfReader(is, password.getBytes(StandardCharsets.UTF_8));
            PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream);
            if (stamper != null) {
                stamper.close();
            }
            inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray());
        } catch (DocumentException ex) {
            throw new PdfRecompressionException(ex);
        } catch (IOException ex) {
            throw new PdfRecompressionException("Reading file caused exception", ex);
        }
    } else {
        inputStream = is;
    }

    PDFParser parser = null;
    COSDocument doc = null;
    try {
        parser = new PDFParser(inputStream);
        parser.parse();
        doc = parser.getDocument();

        List<COSObject> objs = doc.getObjectsByType(COSName.XOBJECT);
        if (objs != null) {
            for (COSObject obj : objs) {
                COSBase subtype = obj.getItem(COSName.SUBTYPE);
                if (subtype.toString().equalsIgnoreCase("COSName{Image}")) {
                    COSBase imageObj = obj.getObject();
                    COSBase cosNameObj = obj.getItem(COSName.NAME);
                    String key;
                    if (cosNameObj != null) {
                        String cosNameKey = cosNameObj.toString();
                        int startOfKey = cosNameKey.indexOf("{") + 1;
                        key = cosNameKey.substring(startOfKey, cosNameKey.length() - 1);
                    } else {
                        key = "im0";
                    }
                    int objectNum = obj.getObjectNumber().intValue();
                    int genNum = obj.getGenerationNumber().intValue();
                    PDXObjectImage image = (PDXObjectImage) PDXObjectImage.createXObject(imageObj);

                    PDStream pdStr = new PDStream(image.getCOSStream());
                    List<COSName> filters = pdStr.getFilters();

                    log.debug("Detected image with color depth: {} bits", image.getBitsPerComponent());
                    if (filters == null) {
                        continue;
                    }
                    log.debug("Detected filters: {}", filters.toString());

                    if ((image.getBitsPerComponent() > 1) && (!binarize)) {
                        log.info("It is not a bitonal image => skipping");
                        continue;
                    }

                    // at this moment for preventing bad output (bad coloring) from LZWDecode filter
                    if (filters.contains(COSName.LZW_DECODE)) {
                        log.info("This is LZWDecoded => skipping");
                        continue;
                    }

                    if (filters.contains(COSName.FLATE_DECODE)) {
                        log.debug("FlateDecoded image detected");
                    }

                    if (filters.contains(COSName.JBIG2_DECODE)) {
                        if (skipJBig2Images) {
                            log.warn("Allready compressed according to JBIG2 standard => skipping");
                            continue;
                        } else {
                            log.debug("JBIG2 image detected");
                        }
                    }

                    // detection of unsupported filters by pdfBox library
                    if (filters.contains(COSName.JPX_DECODE)) {
                        log.warn("Unsupported filter JPXDecode => skipping");
                        continue;
                    }

                    String name = getUniqueFileName(prefix, image.getSuffix());
                    log.info("Writing image: {}", name);
                    image.write2file(name);

                    PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(),
                            image.getHeight(), objectNum, genNum);
                    originalImageInformations.add(pdfImageInfo);

                    namesOfImages.add(name + "." + image.getSuffix());

                }
            }
        }
    } catch (IOException ex) {
        Tools.deleteFilesFromList(namesOfImages);
        throw new PdfRecompressionException("Unable to parse PDF document", ex);
    } catch (Exception ex) {
        Tools.deleteFilesFromList(namesOfImages);
    } finally {
        if (doc != null) {
            try {
                doc.close();
            } catch (IOException ex) {
                throw new PdfRecompressionException(ex);
            }
        }
    }
}

From source file:cz.muni.pdfjbim.PdfImageProcessor.java

License:Apache License

/**
 * This method extracts images by going through all COSObjects pointed from xref table
 * @param is input stream containing PDF file
 * @param password password for access to PDF if needed
 * @param pagesToProcess list of pages which should be processed if null given => processed all pages
 *      -- not working yet/*from   w  w  w  .ja v a  2s .  co m*/
 * @param binarize -- enables processing of nonbitonal images as well (LZW is still not
 *      processed because of output with inverted colors)
 * @throws PdfRecompressionException if problem to extract images from PDF
 */
public void extractImagesUsingPdfParser(InputStream is, String prefix, String password,
        Set<Integer> pagesToProcess, Boolean binarize) throws PdfRecompressionException {
    // checking arguments and setting appropriate variables
    if (binarize == null) {
        binarize = false;
    }

    InputStream inputStream = null;
    if (password != null) {
        try {
            ByteArrayOutputStream decryptedOutputStream = null;
            PdfReader reader = new PdfReader(is, password.getBytes());
            PdfStamper stamper = new PdfStamper(reader, decryptedOutputStream);
            stamper.close();
            inputStream = new ByteArrayInputStream(decryptedOutputStream.toByteArray());
        } catch (DocumentException ex) {
            throw new PdfRecompressionException(ex);
        } catch (IOException ex) {
            throw new PdfRecompressionException("Reading file caused exception", ex);
        }
    } else {
        inputStream = is;
    }

    PDFParser parser = null;
    COSDocument doc = null;
    try {
        parser = new PDFParser(inputStream);
        parser.parse();
        doc = parser.getDocument();

        List<COSObject> objs = doc.getObjectsByType(COSName.XOBJECT);
        if (objs != null) {
            for (COSObject obj : objs) {
                COSBase subtype = obj.getItem(COSName.SUBTYPE);
                if (subtype.toString().equalsIgnoreCase("COSName{Image}")) {
                    COSBase imageObj = obj.getObject();
                    COSBase cosNameObj = obj.getItem(COSName.NAME);
                    String key;
                    if (cosNameObj != null) {
                        String cosNameKey = cosNameObj.toString();
                        int startOfKey = cosNameKey.indexOf("{") + 1;
                        key = cosNameKey.substring(startOfKey, cosNameKey.length() - 1);
                    } else {
                        key = "im0";
                    }
                    int objectNum = obj.getObjectNumber().intValue();
                    int genNum = obj.getGenerationNumber().intValue();
                    PDXObjectImage image = (PDXObjectImage) PDXObjectImage.createXObject(imageObj);

                    PDStream pdStr = new PDStream(image.getCOSStream());
                    List filters = pdStr.getFilters();

                    if ((image.getBitsPerComponent() > 1) && (!binarize)) {
                        log.info("It is not a bitonal image => skipping");

                        continue;
                    }

                    // at this moment for preventing bad output (bad coloring) from LZWDecode filter
                    if (filters.contains(COSName.LZW_DECODE.getName())) {
                        log.info("This is LZWDecoded => skipping");
                        continue;

                    }

                    // detection of unsupported filters by pdfBox library
                    if (filters.contains("JBIG2Decode")) {
                        log.warn("Allready compressed according to JBIG2 standard => skipping");
                        continue;
                    }

                    if (filters.contains("JPXDecode")) {
                        log.warn("Unsupported filter JPXDecode => skipping");
                        continue;
                    }

                    String name = getUniqueFileName(prefix, image.getSuffix());
                    log.info("Writing image:" + name);
                    image.write2file(name);

                    PdfImageInformation pdfImageInfo = new PdfImageInformation(key, image.getWidth(),
                            image.getHeight(), objectNum, genNum);
                    originalImageInformations.add(pdfImageInfo);

                    namesOfImages.add(name + "." + image.getSuffix());

                }
                //                    }
            }
        }
    } catch (IOException ex) {
        throw new PdfRecompressionException("Unable to parse PDF document", ex);
    } finally {
        if (doc != null) {
            try {
                doc.close();
            } catch (IOException ex) {
                throw new PdfRecompressionException(ex);
            }
        }
    }
}

From source file:org.apache.fop.render.pdf.pdfbox.StructureTreeMerger.java

License:Apache License

public void createDirectDescendants(COSBase base, PDFStructElem parent) throws IOException {
    if (base instanceof COSDictionary) {
        COSDictionary baseDict = (COSDictionary) base;
        if (baseDict.keySet().contains(COSName.K)) {
            createDirectDescendants(baseDict.getItem(COSName.K), parent);
        }/*from   w  w w  . j a  v  a  2  s. c o  m*/
    } else if (base instanceof COSArray) {
        COSArray array = (COSArray) base;
        for (int i = 0; i < array.size(); i++) {
            createDirectDescendants(array.get(i), parent);
        }
    } else {
        assert base instanceof COSObject;
        COSObject obj = (COSObject) base;
        createAndRegisterStructElem(obj);
        PDFStructElem elem = structElemCache.get((int) obj.getObjectNumber());
        copyElemEntries(obj, elem);
        parent.addKid(elem);
        elem.setParent(parent);
        COSBase objKid = obj.getItem(COSName.K);
        if (objKid != null) {
            createDirectDescendants(objKid, elem);
        }
    }
}

From source file:org.apache.fop.render.pdf.pdfbox.StructureTreeMerger.java

License:Apache License

private void createParents(COSArray markedContentParents) throws IOException {
    for (COSBase entry : markedContentParents) {
        COSObject elemCos = (COSObject) entry;
        COSObject elemParent = (COSObject) elemCos.getItem(COSName.P);
        PDFStructElem elem = structElemCache.get((int) elemCos.getObjectNumber());
        createParents(elemCos, elemParent, elem);
    }//  www .  j  av  a2s .c o m
}

From source file:org.apache.fop.render.pdf.pdfbox.StructureTreeMergerUtil.java

License:Apache License

public static int findObjectPositionInKidsArray(COSObject kid) {
    COSObject parent = (COSObject) kid.getItem(COSName.P);
    COSBase kids = parent.getItem(COSName.K);
    if (kids instanceof COSArray) {
        COSArray kidsArray = (COSArray) kids;
        return kidsArray.indexOfObject(kid);
    } else {//from   w  w w  .  j  av a2s.c  o  m
        return 0;
    }
}