Example usage for org.apache.pdfbox.pdmodel PDDocument getDocument

List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocument

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel PDDocument getDocument.

Prototype

public COSDocument getDocument() 

Source Link

Document

This will get the low level document.

Usage

From source file:net.padaf.preflight.helpers.TrailerValidationHelper.java

License:Apache License

@Override
public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException {

    List<ValidationError> result = new ArrayList<ValidationError>(0);
    PDDocument pdfDoc = handler.getDocument();

    COSDictionary linearizedDict = isLinearizedPdf(pdfDoc);
    if (linearizedDict != null) {
        // it is a linearized PDF, check the linearized dictionary
        checkLinearizedDictionnary(linearizedDict, result);

        // if the pdf is a linearized pdf. the first trailer must be checked
        // and it must have the same ID than the last trailer.
        List<String> lTrailers = handler.getPdfExtractor().getAllTrailers();
        String firstTrailer = lTrailers.get(0);
        String lastTrailer = lTrailers.get(lTrailers.size() - 1);

        COSDictionary first = null;//from ww  w  .  j ava  2s . com
        COSDictionary last = null;
        COSDocument cd = null;
        try {
            cd = new COSDocument();
            PdfElementParser parser1 = new PdfElementParser(cd, firstTrailer.getBytes());
            first = parser1.parseAsDictionary();
            PdfElementParser parser2 = new PdfElementParser(cd, lastTrailer.getBytes());
            last = parser2.parseAsDictionary();

            checkMainTrailer(pdfDoc.getDocument(), first, result);
            if (!compareIds(first, last, pdfDoc.getDocument())) {
                result.add(new ValidationResult.ValidationError(
                        ValidationConstants.ERROR_SYNTAX_TRAILER_ID_CONSISTENCY,
                        "ID is different in the first and the last trailer"));
            }

        } catch (IOException e) {
            result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER,
                    "Unable to parse trailers of the linearized PDF"));
        } finally {
            COSUtils.closeDocumentQuietly(cd);
        }

    } else {
        // If the PDF isn't a linearized one, only the last trailer must be
        // checked
        List<String> lTrailers = handler.getPdfExtractor().getAllTrailers();
        String lastTrailer = lTrailers.get(lTrailers.size() - 1);

        COSDocument cd = null;
        try {
            cd = new COSDocument();
            PdfElementParser parser = new PdfElementParser(cd, lastTrailer.getBytes());
            COSDictionary trailer = parser.parseAsDictionary();
            checkMainTrailer(pdfDoc.getDocument(), trailer, result);
        } catch (IOException e) {
            result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER,
                    "The trailer dictionary is missing"));
        } finally {
            try {
                cd.close();
            } catch (IOException e) {
                COSUtils.closeDocumentQuietly(cd);
            }
        }

    }
    return result;
}

From source file:net.padaf.preflight.helpers.TrailerValidationHelper.java

License:Apache License

/**
 * According to the PDF Reference, A linearized PDF contain a dictionary as
 * first object (linearized dictionary) and only this one in the first
 * section./*from w w  w . j a v a 2  s  .  c  om*/
 * 
 * @param document
 * @return
 */
protected COSDictionary isLinearizedPdf(PDDocument document) {
    // ---- Get Ref to obj
    COSDocument cDoc = document.getDocument();
    List<?> lObj = cDoc.getObjects();
    for (Object object : lObj) {
        COSBase curObj = ((COSObject) object).getObject();
        if (curObj instanceof COSDictionary
                && ((COSDictionary) curObj).keySet().contains(COSName.getPDFName(DICTIONARY_KEY_LINEARIZED))) {
            return (COSDictionary) curObj;
        }
    }
    return null;
}

From source file:net.padaf.preflight.utils.COSUtils.java

License:Apache License

/**
 * Close the given Document. If the close method of the document throws an
 * exception, it is logged using a log4j logger (Level : WARN)
 * /*from w  ww. j a v  a2s .c  o m*/
 * @param document
 */
public static void closeDocumentQuietly(PDDocument document) {
    if (document != null) {
        closeDocumentQuietly(document.getDocument());
    }
}

From source file:no.digipost.print.validate.PdfValidator.java

License:Apache License

/**
 * Leser hele dokumentet inn i minnet/*from  www.java2 s.  co m*/
 */
private List<PdfValidationError> validerDokumentForPrint(final PDDocument pdDoc,
        final PdfValidationSettings innstillinger) throws IOException {
    List<PdfValidationError> errors = new ArrayList<>();

    if (pdDoc.isEncrypted()) {
        return failValidationIfEncrypted(errors);
    }

    if (innstillinger.validerSideantall) {
        validerSideantall(pdDoc.getNumberOfPages(), errors);
    }

    if (innstillinger.validerPDFversjon) {
        validerPdfVersjon(pdDoc.getDocument().getVersion(), errors);
    }

    boolean dokumentHarUgyldigeDimensjoner = false;
    for (PDPage page : getAllPagesFrom(pdDoc)) {
        if (harUgyldigeDimensjoner(page)) {
            dokumentHarUgyldigeDimensjoner = true;
            break;
        }
    }

    leggTilValideringsfeil(dokumentHarUgyldigeDimensjoner, UNSUPPORTED_DIMENSIONS, errors);

    boolean harTekstIStrekkodeomraade = false;
    boolean dokumentHarSiderHvisMarginIkkeLarSegVerifisereForPrint = false;
    if (innstillinger.validerVenstremarg) {
        for (PDPage page : getAllPagesFrom(pdDoc)) {
            try {
                if (harTekstIStrekkodeomraade(page)) {
                    harTekstIStrekkodeomraade = true;
                    break;
                }
            } catch (NullPointerException npe) {
                dokumentHarSiderHvisMarginIkkeLarSegVerifisereForPrint = true;
                LOG.info("Klarte ikke  verifiserere margen p en side");
            }
        }
    }

    leggTilValideringsfeil(dokumentHarSiderHvisMarginIkkeLarSegVerifisereForPrint,
            UNABLE_TO_VERIFY_SUITABLE_MARGIN_FOR_PRINT, errors);
    leggTilValideringsfeil(harTekstIStrekkodeomraade, INSUFFICIENT_MARGIN_FOR_PRINT, errors);

    if (innstillinger.validerFonter) {
        for (PDPage page : getAllPagesFrom(pdDoc)) {
            validerFonter(fontValidator.getPageFonts(page), errors);
        }
    }

    return errors;
}

From source file:org.apache.fop.render.pdf.pdfbox.AbstractPDFBoxHandler.java

License:Apache License

protected String createStreamForPDF(ImagePDF image, PDFPage targetPage, FOUserAgent userAgent,
        AffineTransform at, FontInfo fontinfo, Rectangle pos, Map<Integer, PDFArray> pageNumbers,
        PDFLogicalStructureHandler handler, PDFStructElem curentSessionElem) throws IOException {

    EventBroadcaster eventBroadcaster = null;
    if (userAgent != null) {
        eventBroadcaster = userAgent.getEventBroadcaster();
    }/*from   w ww. ja v  a2  s.co  m*/
    String originalImageUri = image.getInfo().getOriginalURI();
    final int selectedPage = ImageUtil.needPageIndexFromURI(originalImageUri);

    PDDocument pddoc = image.getPDDocument();
    float pdfVersion = pddoc.getDocument().getVersion();
    Version inputDocVersion = Version.getValueOf(String.valueOf(pdfVersion));
    PDFDocument pdfDoc = targetPage.getDocument();

    if (pdfDoc.getPDFVersion().compareTo(inputDocVersion) < 0) {
        try {
            pdfDoc.setPDFVersion(inputDocVersion);
        } catch (IllegalStateException e) {
            getEventProducer(eventBroadcaster).pdfVersionMismatch(this, pdfDoc.getPDFVersionString(),
                    String.valueOf(pdfVersion));
        }
    }

    //Encryption test
    if (pddoc.isEncrypted()) {
        getEventProducer(eventBroadcaster).encryptedPdf(this);
        return null;
    }

    //Warn about potential problems with PDF/A and PDF/X
    if (pdfDoc.getProfile().isPDFAActive()) {
        getEventProducer(eventBroadcaster).pdfAActive(this);
    }
    if (pdfDoc.getProfile().isPDFXActive()) {
        getEventProducer(eventBroadcaster).pdfXActive(this);
    }

    Map<Object, Object> objectCache = getObjectCache(originalImageUri, userAgent);

    PDPage page = pddoc.getDocumentCatalog().getPages().get(selectedPage);

    if (targetPage.getPDFResources().getParentResources() == null) {
        PDFResources res = pdfDoc.getFactory().makeResources();
        res.setParentResources(pdfDoc.getResources());
        res.addContext(targetPage);
        targetPage.put("Resources", res);
    }

    PDFBoxAdapter adapter = new PDFBoxAdapter(targetPage, objectCache, pageNumbers);
    if (handler != null) {
        adapter.setCurrentMCID(handler.getPageParentTree().length());
    }
    String stream = adapter.createStreamFromPDFBoxPage(pddoc, page, originalImageUri, at, fontinfo, pos);
    if (userAgent.isAccessibilityEnabled()) {
        TaggedPDFConductor conductor = new TaggedPDFConductor(curentSessionElem, handler, page, adapter);
        conductor.handleLogicalStructure(pddoc);
    }
    return stream;
}

From source file:org.apache.fop.render.pdf.pdfbox.PreloaderPDF.java

License:Apache License

private ImageInfo loadPDF(String uri, Source src, ImageContext context) throws IOException, ImageException {
    int selectedPage = ImageUtil.needPageIndexFromURI(uri);

    URI docURI = deriveDocumentURI(src.getSystemId());

    PDDocument pddoc = getDocument(context, docURI, src);
    pddoc = Interceptors.getInstance().interceptOnLoad(pddoc, docURI);

    //Disable the warning about a missing close since we rely on the GC to decide when
    //the cached PDF shall be disposed off.
    pddoc.getDocument().setWarnMissingClose(false);

    int pageCount = pddoc.getNumberOfPages();
    if (selectedPage < 0 || selectedPage >= pageCount) {
        throw new ImageException("Selected page (index: " + selectedPage
                + ") does not exist in the PDF file. The document has " + pddoc.getNumberOfPages() + " pages.");
    }//from  w  w w  .  j a v a 2  s .co  m
    PDPage page = pddoc.getDocumentCatalog().getPages().get(selectedPage);
    PDRectangle mediaBox = page.getMediaBox();
    PDRectangle cropBox = page.getCropBox();
    PDRectangle viewBox = cropBox != null ? cropBox : mediaBox;
    int w = Math.round(viewBox.getWidth() * 1000);
    int h = Math.round(viewBox.getHeight() * 1000);

    //Handle the /Rotation entry on the page dict
    int rotation = PDFUtil.getNormalizedRotation(page);
    if (rotation == 90 || rotation == 270) {
        //Swap width and height
        int exch = w;
        w = h;
        h = exch;
    }

    ImageSize size = new ImageSize();
    size.setSizeInMillipoints(w, h);
    size.setResolution(context.getSourceResolution());
    size.calcPixelsFromSize();

    ImageInfo info = new ImageInfo(uri, ImagePDF.MIME_PDF);
    info.setSize(size);
    info.getCustomObjects().put(ImageInfo.ORIGINAL_IMAGE, new ImagePDF(info, pddoc));

    int lastPageIndex = pddoc.getNumberOfPages() - 1;
    if (selectedPage < lastPageIndex) {
        info.getCustomObjects().put(ImageInfo.HAS_MORE_IMAGES, Boolean.TRUE);
    }

    return info;
}

From source file:org.apache.padaf.preflight.ExtractStream.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("usage : ExtractStream file objNum objGen");
    }/*from   w  w  w .ja va  2s  . c o m*/
    PDDocument document = PDDocument.load(new FileInputStream(args[0]));
    COSObject obj = document.getDocument()
            .getObjectFromPool(new COSObjectKey(Integer.parseInt(args[1]), Integer.parseInt(args[2])));
    if (obj.getObject() instanceof COSStream) {
        COSStream stream = (COSStream) obj.getObject();
        InputStream is = stream.getUnfilteredStream();
        FileOutputStream out = new FileOutputStream("stream.out");
        IOUtils.copyLarge(is, out);
        IOUtils.closeQuietly(out);
    }
}

From source file:org.apache.padaf.preflight.helpers.CatalogValidationHelper.java

License:Apache License

/**
 * This method checks the content of each OutputIntent. The S entry must
 * contain GTS_PDFA1. The DestOuputProfile must contain a valid ICC Profile
 * Stream./*from  w w  w. j a v  a 2s .co m*/
 * 
 * If there are more than one OutputIntent, they have to use the same ICC
 * Profile.
 * 
 * This method returns a list of ValidationError. It is empty if no errors
 * have been found.
 * 
 * @param handler
 * @return
 * @throws ValidationException
 */
public List<ValidationError> validateOutputIntent(DocumentHandler handler) throws ValidationException {
    List<ValidationError> result = new ArrayList<ValidationError>(0);
    PDDocument pdDocument = handler.getDocument();
    PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
    COSDocument cDoc = pdDocument.getDocument();

    COSBase cBase = catalog.getCOSDictionary()
            .getItem(COSName.getPDFName(DOCUMENT_DICTIONARY_KEY_OUTPUT_INTENTS));
    COSArray outputIntents = COSUtils.getAsArray(cBase, cDoc);

    Map<COSObjectKey, Boolean> tmpDestOutputProfile = new HashMap<COSObjectKey, Boolean>();

    for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) {
        COSDictionary dictionary = COSUtils.getAsDictionary(outputIntents.get(i), cDoc);

        if (dictionary == null) {

            result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                    "OutputIntent object is null or isn't a dictionary"));

        } else {
            // ---- S entry is mandatory and must be equals to GTS_PDFA1
            String sValue = dictionary.getNameAsString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_S));
            if (!OUTPUT_INTENT_DICTIONARY_VALUE_GTS_PDFA1.equals(sValue)) {
                result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_S_VALUE_INVALID,
                        "The S entry of the OutputIntent isn't GTS_PDFA1"));
                continue;
            }

            // ---- OutputConditionIdentifier is a mandatory field
            String outputConditionIdentifier = dictionary
                    .getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_OUTPUT_CONDITION_IDENTIFIER));
            if (outputConditionIdentifier == null) {// empty string is autorized (it may be an application specific value)
                result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
                        "The OutputIntentCondition is missing"));
                continue;
            }

            // ---- If OutputConditionIdentifier is "Custom" or a non Standard ICC Characterization :
            // ---- DestOutputProfile and Info are mandatory
            // ---- DestOutputProfile must be a ICC Profile

            // ---- Because of PDF/A conforming file needs to specify the color characteristics, the DestOutputProfile
            // ---- is checked even if the OutputConditionIdentifier isn't "Custom"
            COSBase dop = dictionary
                    .getItem(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_DEST_OUTPUT_PROFILE));
            ValidationError valer = validateICCProfile(dop, cDoc, tmpDestOutputProfile, handler);
            if (valer != null) {
                result.add(valer);
                continue;
            }

            // TODO [LAZY] When Lazy mode will be added, this block should be uncommented to set result as warning.
            //            if (!isStandardICCCharacterization(outputConditionIdentifier)) {
            //               String info = dictionary.getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_INFO));
            //               if (info == null || "".equals(info)) {
            //                  result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY,
            //                        "The Info entry of a OutputIntent dictionary is missing"));
            //                  continue;
            //               }
            //            }
        }
    }
    return result;
}

From source file:org.apache.padaf.preflight.helpers.MetadataValidationHelper.java

License:Apache License

public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException {
    try {/*from   www  .j  a  v  a2 s . com*/
        PDDocument document = handler.getDocument();

        byte[] tmp = getXpacket(document.getDocument());
        XMPDocumentBuilder builder;
        try {
            builder = new XMPDocumentBuilder();
            builder.addPreprocessor(new PDFAExtentionSchemaPreprocessor());
        } catch (XmpSchemaException e1) {
            throw new ValidationException(e1.getMessage(), e1);
        }
        XMPMetadata metadata;
        try {
            metadata = builder.parse(tmp);
            handler.setMetadata(metadata);
        } catch (XmpSchemaException e) {
            throw new ValidationException("Parser: Internal Problem (failed to instanciate Schema object)", e);
        } catch (XmpXpacketEndException e) {
            throw new ValidationException("Unable to parse font metadata due to : " + e.getMessage(), e);
        }
        List<ValidationError> lve = new ArrayList<ValidationError>();

        // 6.7.5 no deprecated attribute in xpacket processing instruction
        if (metadata.getXpacketBytes() != null) {
            lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_XPACKET_DEPRECATED,
                    "bytes attribute is forbidden"));
        }
        if (metadata.getXpacketEncoding() != null) {
            lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_XPACKET_DEPRECATED,
                    "encoding attribute is forbidden"));
        }

        // Call metadata synchronization checking
        lve.addAll(new SynchronizedMetaDataValidation().validateMetadataSynchronization(document, metadata));

        // Call PDF/A Identifier checking
        lve.addAll(new PDFAIdentificationValidation().validatePDFAIdentifer(metadata));

        // Call rdf:about checking
        try {
            new RDFAboutAttributeConcordanceValidation().validateRDFAboutAttributes(metadata);
        } catch (DifferentRDFAboutException e) {
            lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_RDF_ABOUT_ATTRIBUTE_INEQUAL_VALUE,
                    e.getMessage()));
        }

        return lve;
    } catch (XpacketParsingException e) {
        List<ValidationError> lve = new ArrayList<ValidationError>();
        if (e.getError() != null) {
            lve.add(e.getError());
        } else {
            lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MAIN, "Unexpected error"));
        }
        return lve;
    } catch (XmpPropertyFormatException e) {
        List<ValidationError> lve = new ArrayList<ValidationError>();
        lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_FORMAT, e.getMessage()));
        return lve;
    } catch (BadFieldValueException e) {
        List<ValidationError> lve = new ArrayList<ValidationError>();
        lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_CATEGORY_PROPERTY_INVALID,
                e.getMessage()));
        return lve;
    } catch (XmpExpectedRdfAboutAttribute e) {
        List<ValidationError> lve = new ArrayList<ValidationError>();
        lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_RDF_ABOUT_ATTRIBUTE_MISSING,
                e.getMessage()));
        return lve;
    } catch (XmpUnknownPropertyException e) {
        List<ValidationError> lve = new ArrayList<ValidationError>();
        lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_UNKNOWN, e.getMessage()));
        return lve;
    } catch (XmpUnknownSchemaException e) {
        List<ValidationError> lve = new ArrayList<ValidationError>();
        lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_ABSENT_DESCRIPTION_SCHEMA,
                e.getMessage()));
        return lve;
    } catch (XmpUnexpectedNamespaceURIException e) {
        List<ValidationError> lve = new ArrayList<ValidationError>();
        lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_WRONG_NS_URI, e.getMessage()));
        return lve;
    } catch (XmpUnexpectedNamespacePrefixException e) {
        List<ValidationError> lve = new ArrayList<ValidationError>();
        lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_ABSENT_DESCRIPTION_SCHEMA,
                e.getMessage()));
        return lve;
    } catch (XmpRequiredPropertyException e) {
        List<ValidationError> lve = new ArrayList<ValidationError>();
        lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_MISSING, e.getMessage()));
        return lve;
    } catch (XmpUnknownValueTypeException e) {
        List<ValidationError> lve = new ArrayList<ValidationError>();
        lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_UNKNOWN_VALUETYPE, e.getMessage()));
        return lve;
    } catch (XmpParsingException e) {
        List<ValidationError> lve = new ArrayList<ValidationError>();
        lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_FORMAT, e.getMessage()));
        return lve;
    }

    catch (IOException e) {
        throw new ValidationException("Failed while validating", e);
    }
}

From source file:org.apache.padaf.preflight.helpers.TrailerValidationHelper.java

License:Apache License

@Override
public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException {

    List<ValidationError> result = new ArrayList<ValidationError>(0);
    PDDocument pdfDoc = handler.getDocument();

    COSDictionary linearizedDict = isLinearizedPdf(pdfDoc);
    if (linearizedDict != null) {
        // it is a linearized PDF, check the linearized dictionary
        checkLinearizedDictionnary(linearizedDict, result);

        // if the pdf is a linearized pdf. the first trailer must be checked
        // and it must have the same ID than the last trailer.
        // According to the PDF version, trailers are available by the trailer key word (pdf <= 1.4)
        // or in the dictionary of the XRef stream ( PDF >= 1.5)
        String pdfVersion = pdfDoc.getDocument().getHeaderString();
        if (pdfVersion != null && pdfVersion.matches("%PDF-1\\.[1-4]")) {
            checkTrailersForLinearizedPDF14(handler, result);
        } else {/*from   w w  w  . j  a  v a 2 s  .c om*/
            checkTrailersForLinearizedPDF15(handler, result);
        }

    } else {
        // If the PDF isn't a linearized one, only the last trailer must be checked
        checkMainTrailer(pdfDoc.getDocument(), pdfDoc.getDocument().getTrailer(), result);

    }

    return result;
}