Example usage for org.apache.pdfbox.cos COSObject getObject

Introduction

In this page you can find the example usage for org.apache.pdfbox.cos COSObject getObject.

Prototype

public COSBase getObject()

Source Link

Document

This will get the object that this object encapsulates.

Usage

From source file:org.apache.fop.render.pdf.pdfbox.StructureTreeMerger.java

License:Apache License

private void findLeafNodesInPageFromStructElemObjects(COSBase entry) throws IOException {
    if (entry instanceof COSObject) {
        COSObject entryObj = (COSObject) entry;
        COSDictionary structElemDictionary = (COSDictionary) entryObj.getObject();
        COSBase kid = structElemDictionary.getItem(COSName.K);
        findLeafKids(kid, entryObj);/*w  w  w .j av  a  2  s. c o  m*/
    }
}

From source file:org.apache.fop.render.pdf.pdfbox.StructureTreeMerger.java

License:Apache License

private void findLeafKids(COSBase kid, COSObject parent) throws IOException {
    if (kid instanceof COSArray) {
        COSArray arrayKid = (COSArray) kid;
        for (COSBase arrayEntry : arrayKid) {
            findLeafKids(arrayEntry, parent);
        }/*  www .ja va  2  s .  c  om*/
    } else if (kid instanceof COSObject) {
        COSObject kidObject = (COSObject) kid;
        COSBase base = kidObject.getObject();
        COSDictionary temp = (COSDictionary) base;
        if (temp.getDictionaryObject(COSName.S) != null && temp.getItem(COSName.K) != null) {

            COSBase tempKids = temp.getItem(COSName.K);
            findLeafKids(tempKids, kidObject);
        } else {
            findLeafKids(temp, parent);
        }
    } else if (kid instanceof COSDictionary) {
        COSDictionary kidDictionary = (COSDictionary) kid;
        COSDictionary parentDict = (COSDictionary) parent.getObject();
        if (isElementFromSourcePage(kidDictionary, parentDict)) {
            PDFStructElem elem = structElemCache.get((int) parent.getObjectNumber());
            if (elem == null) {
                elem = createAndRegisterStructElem(parent);
                copyElemEntries(parent, elem);
                extra.add(parent);
                createKids(kid, parentDict, elem, false);
            }
        }
    } else {
        assert kid instanceof COSInteger;
        COSDictionary parentDict = (COSDictionary) parent.getObject();
        if (checkPageEntryInAncestorsRecursively(parentDict)) {
            PDFStructElem elem = structElemCache.get((int) parent.getObjectNumber());
            if (elem == null) {
                elem = createAndRegisterStructElem(parent);
                copyElemEntries(parent, elem);
                createKids(kid, parentDict, elem, false);
            }
        }
    }
}

From source file:org.apache.padaf.preflight.ExtractStream.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("usage : ExtractStream file objNum objGen");
    }//w  w  w.jav  a  2 s.  c  o  m
    PDDocument document = PDDocument.load(new FileInputStream(args[0]));
    COSObject obj = document.getDocument()
            .getObjectFromPool(new COSObjectKey(Integer.parseInt(args[1]), Integer.parseInt(args[2])));
    if (obj.getObject() instanceof COSStream) {
        COSStream stream = (COSStream) obj.getObject();
        InputStream is = stream.getUnfilteredStream();
        FileOutputStream out = new FileOutputStream("stream.out");
        IOUtils.copyLarge(is, out);
        IOUtils.closeQuietly(out);
    }
}

From source file:org.apache.padaf.preflight.helpers.StreamValidationHelper.java

License:Apache License

protected void checkStreamLength(DocumentHandler handler, COSObject cObj, List<ValidationError> result)
        throws ValidationException {
    COSStream streamObj = (COSStream) cObj.getObject();
    int length = streamObj.getInt(COSName.getPDFName(STREAM_DICTIONARY_KEY_LENGHT));
    InputStream ra = null;/* w  w  w .  j av  a2 s .  com*/
    try {
        ra = handler.getSource().getInputStream();
        Long offset = handler.getDocument().getDocument().getXrefTable().get(new COSObjectKey(cObj));

        // ---- go to the beginning of the object
        long skipped = 0;
        if (offset != null) {
            while (skipped != offset) {
                long curSkip = ra.skip(offset - skipped);
                if (curSkip < 0) {
                    throw new ValidationException("Unable to skip bytes in the PDFFile to check stream length");
                }
                skipped += curSkip;
            }

            // ---- go to the stream key word
            if (readUntilStream(ra)) {
                int c = ra.read();
                if (c == '\r') {
                    ra.read();
                } // else c is '\n' no more character to read

                // ---- Here is the true beginning of the Stream Content.
                // ---- Read the given length of bytes and check the 10 next bytes
                // ---- to see if there are endstream.
                byte[] buffer = new byte[1024];
                int nbBytesToRead = length;

                do {
                    int cr = 0;
                    if (nbBytesToRead > 1024) {
                        cr = ra.read(buffer, 0, 1024);
                    } else {
                        cr = ra.read(buffer, 0, nbBytesToRead);
                    }
                    if (cr == -1) {
                        result.add(new ValidationResult.ValidationError(
                                ValidationConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID,
                                "Stream length is invalide"));
                        return;
                    } else {
                        nbBytesToRead = nbBytesToRead - cr;
                    }
                } while (nbBytesToRead > 0);

                int len = "endstream".length() + 2;
                byte[] buffer2 = new byte[len];
                for (int i = 0; i < len; ++i) {
                    buffer2[i] = (byte) ra.read();
                }

                // ---- check the content of 10 last characters
                String endStream = new String(buffer2);
                if (buffer2[0] == '\r' && buffer2[1] == '\n') {
                    if (!endStream.contains("endstream")) {
                        result.add(new ValidationResult.ValidationError(
                                ValidationConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID,
                                "Stream length is invalide"));
                    }
                } else if (buffer2[0] == '\r' && buffer2[1] == 'e') {
                    if (!endStream.contains("endstream")) {
                        result.add(new ValidationResult.ValidationError(
                                ValidationConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID,
                                "Stream length is invalide"));
                    }
                } else if (buffer2[0] == '\n' && buffer2[1] == 'e') {
                    if (!endStream.contains("endstream")) {
                        result.add(new ValidationResult.ValidationError(
                                ValidationConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID,
                                "Stream length is invalide"));
                    }
                } else {
                    result.add(new ValidationResult.ValidationError(
                            ValidationConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID,
                            "Stream length is invalide"));
                }

            } else {
                result.add(new ValidationResult.ValidationError(
                        ValidationConstants.ERROR_SYNTAX_STREAM_LENGTH_INVALID, "Stream length is invalide"));
            }
        } else {
            /*
             * 
             * Offset is null. The stream isn't used, check is useless.
             * 
             * TODO : Is it the truth? 
             */
        }
    } catch (IOException e) {
        throw new ValidationException("Unable to read a stream to validate it due to : " + e.getMessage(), e);
    } finally {
        if (ra != null) {
            IOUtils.closeQuietly(ra);
        }
    }
}

From source file:org.apache.padaf.preflight.helpers.TrailerValidationHelper.java

License:Apache License

/**
 * Accesses and compares First and Last trailers for a PDF version higher than 1.4.
 * /*from   w w  w .jav  a 2 s.  c  om*/
 * @param handler
 * @param result
 */
protected void checkTrailersForLinearizedPDF15(DocumentHandler handler, List<ValidationError> result) {
    PDDocument pdfDoc = handler.getDocument();
    try {
        COSDocument cosDocument = pdfDoc.getDocument();
        List<COSObject> xrefs = cosDocument.getObjectsByType(COSName.XREF);

        if (xrefs.isEmpty()) {
            // no XRef CosObject, may by this pdf file used the PDF 1.4 syntaxe
            checkTrailersForLinearizedPDF14(handler, result);

        } else {

            long min = Long.MAX_VALUE;
            long max = Long.MIN_VALUE;
            COSDictionary firstTrailer = null;
            COSDictionary lastTrailer = null;

            // Search First and Last trailers according to offset position.
            for (COSObject co : xrefs) {
                long offset = cosDocument.getXrefTable().get(new COSObjectKey(co));
                if (offset < min) {
                    min = offset;
                    firstTrailer = (COSDictionary) co.getObject();
                }

                if (offset > max) {
                    max = offset;
                    lastTrailer = (COSDictionary) co.getObject();
                }

            }

            checkMainTrailer(pdfDoc.getDocument(), firstTrailer, result);
            if (!compareIds(firstTrailer, lastTrailer, pdfDoc.getDocument())) {
                result.add(new ValidationResult.ValidationError(
                        ValidationConstants.ERROR_SYNTAX_TRAILER_ID_CONSISTENCY,
                        "ID is different in the first and the last trailer"));
            }
        }
    } catch (IOException e) {
        result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER,
                "Unable to check PDF Trailers due to : " + e.getMessage()));
    }
}

From source file:org.apache.padaf.preflight.RetrieveMissingStream.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.err.println("usage : RetrieveMissingStream file");
        System.exit(233);//w  ww.j  a  v a2s  . c  o  m
    }

    HashSet<COSObjectKey> listOfKeys = new HashSet<COSObjectKey>();

    PDDocument document = PDDocument.load(new FileInputStream(args[0]));
    List<COSObject> lCosObj = document.getDocument().getObjects();
    for (COSObject cosObject : lCosObj) {

        if (cosObject.getObject() instanceof COSStream) {
            listOfKeys.add(new COSObjectKey(cosObject.getObjectNumber().intValue(),
                    cosObject.getGenerationNumber().intValue()));
        }

    }

    PDDocumentCatalog catalog = document.getDocumentCatalog();
    List<?> pages = catalog.getAllPages();
    for (int i = 0; i < pages.size(); ++i) {
        PDPage pdp = (PDPage) pages.get(i);
        PDStream pdStream = pdp.getContents();

        COSBase b = pdp.getCOSDictionary().getItem(COSName.getPDFName("Contents"));
        System.out.println();
    }
}

From source file:org.crossref.pdfmark.Main.java

License:Open Source License

/**
 * According to the PDF Reference Manual (appendix F) a linearized PDF
 * must have as its first object after the PDF header an indirect
 * dictionary containing only direct objects. Among these objects one
 * must be assigned the key "Linearized", representing the linearized PDF
 * version number.//from  w  w w .  j  a va  2  s.  c  om
 * 
 * @return true if the PDF read by reader is a linearized PDF.
 */
public static boolean isLinearizedPdf(FileInputStream in) throws IOException {
    boolean isLinear = false;

    PDFParser parser = new PDFParser(in);
    parser.parse();
    COSDocument doc = parser.getDocument();

    for (Object o : doc.getObjects()) {
        COSObject obj = (COSObject) o;
        if (obj.getObject() instanceof COSDictionary) {
            COSDictionary dict = (COSDictionary) obj.getObject();
            for (Object key : dict.keyList()) {
                COSName name = (COSName) key;
                if ("Linearized".equals(name.getName())) {
                    isLinear = true;
                    break;
                }
            }

            if (isLinear)
                break;
        }
    }

    doc.close();

    return isLinear;
}

From source file:org.lockss.pdf.pdfbox.PdfBoxTokens.java

License:Open Source License

/**
 * <p>/*from w ww  .jav  a  2s. com*/
 * Converts from a {@link COSObject} to a PDF token value.
 * </p>
 * @param cosObject A {@link COSObject} instance.
 * @return A PDFToken value.
 * @since 1.56.3
 */
protected static PdfToken getObject(COSObject cosObject) {
    return convertOne(cosObject.getObject());
}

From source file:serock.pdfpagerestorer.PdfPageRestorer.java

License:Open Source License

private static void addPages(final PDDocument pdDoc, final COSDocument cosDoc) throws IOException {
    final List<COSObject> pageObjects = cosDoc.getObjectsByType(COSName.PAGE);
    for (COSObject pageObject : pageObjects) {
        final COSBase baseObject = pageObject.getObject();
        final COSDictionary pageDictionary = (COSDictionary) baseObject;
        final PDPage page = new PDPage(pageDictionary);
        pdDoc.addPage(page);/*from   w ww . jav  a2s  .  c  o m*/
    }
}

From source file:uk.ac.liverpool.thumbnails.PDFService.java

License:Open Source License

@Override
public FontInformation[] extractFontList(URI u, File fff) throws MalformedURLException, IOException {

    SortedSet<FontInformation> ret = new TreeSet<FontInformation>();
    PDDocument document = getPages(u, fff);
    List pages = document.getDocumentCatalog().getAllPages();
    int i = 0;//from  w w w  . j ava2 s .  com
    // The code down here is easier as it gets all the fonts used in the document. Still, this would inlcude unused fonts, so we get the fonts page by page and add them to a Hash table.
    for (COSObject c : document.getDocument().getObjectsByType(COSName.FONT)) {
        if (c == null || !(c.getObject() instanceof COSDictionary))
            continue;
        //System.out.println(c.getObject());

        COSDictionary fontDictionary = (COSDictionary) c.getObject();
        // System.out.println(dic.getNameAsString(COSName.BASE_FONT));
        //            }
        //        }
        //        int pagen = document.getNumberOfPages();
        //        i=0;
        //        for (int p=0;p<pagen;p++){
        //            PDPage page = (PDPage)pages.get(p);
        //            PDResources res = page.findResources();
        //            //for each page resources
        //            if (res==null) continue; 
        //            // get the font dictionary
        //            COSDictionary fonts = (COSDictionary) res.getCOSDictionary().getDictionaryObject( COSName.FONT );
        //            for( COSName fontName : fonts.keySet() ) {
        //                COSObject font = (COSObject) fonts.getItem( fontName );
        //                // if the font has already been visited we ingore it
        //                long objectId = font.getObjectNumber().longValue();
        //                if (ret.get(objectId)!=null)
        //                    continue;
        //                if( font==null ||  ! (font.getObject() instanceof COSDictionary) )
        //                    continue;
        //                COSDictionary fontDictionary = (COSDictionary)font.getObject();

        // Type MUSt be font
        if (!fontDictionary.getNameAsString(COSName.TYPE).equals("Font"))
            continue;
        // get the variables
        FontInformation fi = new FontInformation();
        fi.fontType = fontDictionary.getNameAsString(COSName.SUBTYPE);

        String baseFont = fontDictionary.getNameAsString(COSName.BASE_FONT);
        if (baseFont == null)
            continue;
        if (Arrays.binarySearch(standard14, baseFont) >= 0)
            continue;
        COSDictionary fontDescriptor = (COSDictionary) fontDictionary.getDictionaryObject(COSName.FONT_DESC);
        COSBase enc = fontDictionary.getItem(COSName.ENCODING);
        COSBase uni = fontDictionary.getItem(COSName.TO_UNICODE);
        int firstChar = fontDictionary.getInt(COSName.FIRST_CHAR);
        int lastChar = fontDictionary.getInt(COSName.LAST_CHAR);
        String encoding;
        boolean toUnicode = uni != null;
        if (enc == null) {
            encoding = "standard14";
        }
        if (enc instanceof COSString) {
            encoding = ((COSString) enc).getString();
        } else {
            encoding = "table";
        }
        fi.isSubset = false;
        boolean t = true;
        // Type one and TT can have subsets defineing the basename see 5.5.3 pdfref 1.6
        //  if (fi.fontType.lastIndexOf(COSName.TYPE1.getName())!=-1 || fi.fontType.equals(COSName.TRUE_TYPE.getName()) )
        if (baseFont != null) {
            if (baseFont.length() > 6) {
                for (int k = 0; k < 6; k++)
                    if (!Character.isUpperCase(baseFont.charAt(k)))
                        t = false;
                if (baseFont.charAt(6) != '+')
                    t = false;
            } else
                t = false;
            fi.isSubset = t;
            if (fi.isSubset)
                baseFont = baseFont.substring(7);
        }
        fi.fontFlags = 0;
        if (fi.fontType.equals(COSName.TYPE0) || fi.fontType.equals(COSName.TYPE3))
            fi.isEmbedded = true;

        if (fontDescriptor != null) {
            // in Type1 charset indicates font is subsetted
            if (fontDescriptor.getItem(COSName.CHAR_SET) != null)
                fi.isSubset = true;
            if (fontDescriptor.getItem(COSName.FONT_FILE) != null
                    || fontDescriptor.getItem(COSName.FONT_FILE3) != null
                    || fontDescriptor.getItem(COSName.FONT_FILE2) != null)
                fi.isEmbedded = true;
            fi.fontFlags = fontDescriptor.getInt(COSName.getPDFName("Flags"));
            fi.fontFamily = fontDescriptor.getString(COSName.FONT_FAMILY);
            fi.fontStretch = fontDescriptor.getString(COSName.FONT_STRETCH);
        }
        fi.charset = encoding;
        fi.fontName = baseFont;
        fi.isToUnicode = toUnicode;

        ret.add(fi);

    } // for all fonts 

    //    } // for all pages
    Iterator<FontInformation> it = ret.iterator();
    FontInformation prev = null;
    LinkedList<FontInformation> toDelete = new LinkedList<FontInformation>();
    while (it.hasNext()) {
        FontInformation current = it.next();

        if (prev != null && prev.fontName.equals(current.fontName) && prev.fontType.startsWith("CIDFontType"))
            toDelete.add(current);
        prev = current;
    }
    ret.removeAll(toDelete);
    FontInformation[] retArray = ret.toArray(new FontInformation[0]);

    return retArray;
}