List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocument
public COSDocument getDocument()
From source file:net.padaf.preflight.helpers.TrailerValidationHelper.java
License:Apache License
@Override public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException { List<ValidationError> result = new ArrayList<ValidationError>(0); PDDocument pdfDoc = handler.getDocument(); COSDictionary linearizedDict = isLinearizedPdf(pdfDoc); if (linearizedDict != null) { // it is a linearized PDF, check the linearized dictionary checkLinearizedDictionnary(linearizedDict, result); // if the pdf is a linearized pdf. the first trailer must be checked // and it must have the same ID than the last trailer. List<String> lTrailers = handler.getPdfExtractor().getAllTrailers(); String firstTrailer = lTrailers.get(0); String lastTrailer = lTrailers.get(lTrailers.size() - 1); COSDictionary first = null;//from ww w . j ava 2s . com COSDictionary last = null; COSDocument cd = null; try { cd = new COSDocument(); PdfElementParser parser1 = new PdfElementParser(cd, firstTrailer.getBytes()); first = parser1.parseAsDictionary(); PdfElementParser parser2 = new PdfElementParser(cd, lastTrailer.getBytes()); last = parser2.parseAsDictionary(); checkMainTrailer(pdfDoc.getDocument(), first, result); if (!compareIds(first, last, pdfDoc.getDocument())) { result.add(new ValidationResult.ValidationError( ValidationConstants.ERROR_SYNTAX_TRAILER_ID_CONSISTENCY, "ID is different in the first and the last trailer")); } } catch (IOException e) { result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER, "Unable to parse trailers of the linearized PDF")); } finally { COSUtils.closeDocumentQuietly(cd); } } else { // If the PDF isn't a linearized one, only the last trailer must be // checked List<String> lTrailers = handler.getPdfExtractor().getAllTrailers(); String lastTrailer = lTrailers.get(lTrailers.size() - 1); COSDocument cd = null; try { cd = new COSDocument(); PdfElementParser parser = new PdfElementParser(cd, lastTrailer.getBytes()); COSDictionary trailer = parser.parseAsDictionary(); checkMainTrailer(pdfDoc.getDocument(), trailer, result); } catch (IOException e) { result.add(new ValidationResult.ValidationError(ValidationConstants.ERROR_SYNTAX_TRAILER, "The trailer dictionary is missing")); } finally { try { cd.close(); } catch (IOException e) { COSUtils.closeDocumentQuietly(cd); } } } return result; }
From source file:net.padaf.preflight.helpers.TrailerValidationHelper.java
License:Apache License
/** * According to the PDF Reference, A linearized PDF contain a dictionary as * first object (linearized dictionary) and only this one in the first * section./*from w w w . j a v a 2 s . c om*/ * * @param document * @return */ protected COSDictionary isLinearizedPdf(PDDocument document) { // ---- Get Ref to obj COSDocument cDoc = document.getDocument(); List<?> lObj = cDoc.getObjects(); for (Object object : lObj) { COSBase curObj = ((COSObject) object).getObject(); if (curObj instanceof COSDictionary && ((COSDictionary) curObj).keySet().contains(COSName.getPDFName(DICTIONARY_KEY_LINEARIZED))) { return (COSDictionary) curObj; } } return null; }
From source file:net.padaf.preflight.utils.COSUtils.java
License:Apache License
/** * Close the given Document. If the close method of the document throws an * exception, it is logged using a log4j logger (Level : WARN) * /*from w ww. j a v a2s .c o m*/ * @param document */ public static void closeDocumentQuietly(PDDocument document) { if (document != null) { closeDocumentQuietly(document.getDocument()); } }
From source file:no.digipost.print.validate.PdfValidator.java
License:Apache License
/** * Leser hele dokumentet inn i minnet/*from www.java2 s. co m*/ */ private List<PdfValidationError> validerDokumentForPrint(final PDDocument pdDoc, final PdfValidationSettings innstillinger) throws IOException { List<PdfValidationError> errors = new ArrayList<>(); if (pdDoc.isEncrypted()) { return failValidationIfEncrypted(errors); } if (innstillinger.validerSideantall) { validerSideantall(pdDoc.getNumberOfPages(), errors); } if (innstillinger.validerPDFversjon) { validerPdfVersjon(pdDoc.getDocument().getVersion(), errors); } boolean dokumentHarUgyldigeDimensjoner = false; for (PDPage page : getAllPagesFrom(pdDoc)) { if (harUgyldigeDimensjoner(page)) { dokumentHarUgyldigeDimensjoner = true; break; } } leggTilValideringsfeil(dokumentHarUgyldigeDimensjoner, UNSUPPORTED_DIMENSIONS, errors); boolean harTekstIStrekkodeomraade = false; boolean dokumentHarSiderHvisMarginIkkeLarSegVerifisereForPrint = false; if (innstillinger.validerVenstremarg) { for (PDPage page : getAllPagesFrom(pdDoc)) { try { if (harTekstIStrekkodeomraade(page)) { harTekstIStrekkodeomraade = true; break; } } catch (NullPointerException npe) { dokumentHarSiderHvisMarginIkkeLarSegVerifisereForPrint = true; LOG.info("Klarte ikke verifiserere margen p en side"); } } } leggTilValideringsfeil(dokumentHarSiderHvisMarginIkkeLarSegVerifisereForPrint, UNABLE_TO_VERIFY_SUITABLE_MARGIN_FOR_PRINT, errors); leggTilValideringsfeil(harTekstIStrekkodeomraade, INSUFFICIENT_MARGIN_FOR_PRINT, errors); if (innstillinger.validerFonter) { for (PDPage page : getAllPagesFrom(pdDoc)) { validerFonter(fontValidator.getPageFonts(page), errors); } } return errors; }
From source file:org.apache.fop.render.pdf.pdfbox.AbstractPDFBoxHandler.java
License:Apache License
protected String createStreamForPDF(ImagePDF image, PDFPage targetPage, FOUserAgent userAgent, AffineTransform at, FontInfo fontinfo, Rectangle pos, Map<Integer, PDFArray> pageNumbers, PDFLogicalStructureHandler handler, PDFStructElem curentSessionElem) throws IOException { EventBroadcaster eventBroadcaster = null; if (userAgent != null) { eventBroadcaster = userAgent.getEventBroadcaster(); }/*from w ww. ja v a2 s.co m*/ String originalImageUri = image.getInfo().getOriginalURI(); final int selectedPage = ImageUtil.needPageIndexFromURI(originalImageUri); PDDocument pddoc = image.getPDDocument(); float pdfVersion = pddoc.getDocument().getVersion(); Version inputDocVersion = Version.getValueOf(String.valueOf(pdfVersion)); PDFDocument pdfDoc = targetPage.getDocument(); if (pdfDoc.getPDFVersion().compareTo(inputDocVersion) < 0) { try { pdfDoc.setPDFVersion(inputDocVersion); } catch (IllegalStateException e) { getEventProducer(eventBroadcaster).pdfVersionMismatch(this, pdfDoc.getPDFVersionString(), String.valueOf(pdfVersion)); } } //Encryption test if (pddoc.isEncrypted()) { getEventProducer(eventBroadcaster).encryptedPdf(this); return null; } //Warn about potential problems with PDF/A and PDF/X if (pdfDoc.getProfile().isPDFAActive()) { getEventProducer(eventBroadcaster).pdfAActive(this); } if (pdfDoc.getProfile().isPDFXActive()) { getEventProducer(eventBroadcaster).pdfXActive(this); } Map<Object, Object> objectCache = getObjectCache(originalImageUri, userAgent); PDPage page = pddoc.getDocumentCatalog().getPages().get(selectedPage); if (targetPage.getPDFResources().getParentResources() == null) { PDFResources res = pdfDoc.getFactory().makeResources(); res.setParentResources(pdfDoc.getResources()); res.addContext(targetPage); targetPage.put("Resources", res); } PDFBoxAdapter adapter = new PDFBoxAdapter(targetPage, objectCache, pageNumbers); if (handler != null) { adapter.setCurrentMCID(handler.getPageParentTree().length()); } String stream = adapter.createStreamFromPDFBoxPage(pddoc, page, originalImageUri, at, fontinfo, pos); if (userAgent.isAccessibilityEnabled()) { TaggedPDFConductor conductor = new TaggedPDFConductor(curentSessionElem, handler, page, adapter); conductor.handleLogicalStructure(pddoc); } return stream; }
From source file:org.apache.fop.render.pdf.pdfbox.PreloaderPDF.java
License:Apache License
private ImageInfo loadPDF(String uri, Source src, ImageContext context) throws IOException, ImageException { int selectedPage = ImageUtil.needPageIndexFromURI(uri); URI docURI = deriveDocumentURI(src.getSystemId()); PDDocument pddoc = getDocument(context, docURI, src); pddoc = Interceptors.getInstance().interceptOnLoad(pddoc, docURI); //Disable the warning about a missing close since we rely on the GC to decide when //the cached PDF shall be disposed off. pddoc.getDocument().setWarnMissingClose(false); int pageCount = pddoc.getNumberOfPages(); if (selectedPage < 0 || selectedPage >= pageCount) { throw new ImageException("Selected page (index: " + selectedPage + ") does not exist in the PDF file. The document has " + pddoc.getNumberOfPages() + " pages."); }//from w w w . j a v a 2 s .co m PDPage page = pddoc.getDocumentCatalog().getPages().get(selectedPage); PDRectangle mediaBox = page.getMediaBox(); PDRectangle cropBox = page.getCropBox(); PDRectangle viewBox = cropBox != null ? cropBox : mediaBox; int w = Math.round(viewBox.getWidth() * 1000); int h = Math.round(viewBox.getHeight() * 1000); //Handle the /Rotation entry on the page dict int rotation = PDFUtil.getNormalizedRotation(page); if (rotation == 90 || rotation == 270) { //Swap width and height int exch = w; w = h; h = exch; } ImageSize size = new ImageSize(); size.setSizeInMillipoints(w, h); size.setResolution(context.getSourceResolution()); size.calcPixelsFromSize(); ImageInfo info = new ImageInfo(uri, ImagePDF.MIME_PDF); info.setSize(size); info.getCustomObjects().put(ImageInfo.ORIGINAL_IMAGE, new ImagePDF(info, pddoc)); int lastPageIndex = pddoc.getNumberOfPages() - 1; if (selectedPage < lastPageIndex) { info.getCustomObjects().put(ImageInfo.HAS_MORE_IMAGES, Boolean.TRUE); } return info; }
From source file:org.apache.padaf.preflight.ExtractStream.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 3) { System.err.println("usage : ExtractStream file objNum objGen"); }/*from w w w .ja va 2s . c o m*/ PDDocument document = PDDocument.load(new FileInputStream(args[0])); COSObject obj = document.getDocument() .getObjectFromPool(new COSObjectKey(Integer.parseInt(args[1]), Integer.parseInt(args[2]))); if (obj.getObject() instanceof COSStream) { COSStream stream = (COSStream) obj.getObject(); InputStream is = stream.getUnfilteredStream(); FileOutputStream out = new FileOutputStream("stream.out"); IOUtils.copyLarge(is, out); IOUtils.closeQuietly(out); } }
From source file:org.apache.padaf.preflight.helpers.CatalogValidationHelper.java
License:Apache License
/** * This method checks the content of each OutputIntent. The S entry must * contain GTS_PDFA1. The DestOuputProfile must contain a valid ICC Profile * Stream./*from w w w. j a v a 2s .co m*/ * * If there are more than one OutputIntent, they have to use the same ICC * Profile. * * This method returns a list of ValidationError. It is empty if no errors * have been found. * * @param handler * @return * @throws ValidationException */ public List<ValidationError> validateOutputIntent(DocumentHandler handler) throws ValidationException { List<ValidationError> result = new ArrayList<ValidationError>(0); PDDocument pdDocument = handler.getDocument(); PDDocumentCatalog catalog = pdDocument.getDocumentCatalog(); COSDocument cDoc = pdDocument.getDocument(); COSBase cBase = catalog.getCOSDictionary() .getItem(COSName.getPDFName(DOCUMENT_DICTIONARY_KEY_OUTPUT_INTENTS)); COSArray outputIntents = COSUtils.getAsArray(cBase, cDoc); Map<COSObjectKey, Boolean> tmpDestOutputProfile = new HashMap<COSObjectKey, Boolean>(); for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) { COSDictionary dictionary = COSUtils.getAsDictionary(outputIntents.get(i), cDoc); if (dictionary == null) { result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "OutputIntent object is null or isn't a dictionary")); } else { // ---- S entry is mandatory and must be equals to GTS_PDFA1 String sValue = dictionary.getNameAsString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_S)); if (!OUTPUT_INTENT_DICTIONARY_VALUE_GTS_PDFA1.equals(sValue)) { result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_S_VALUE_INVALID, "The S entry of the OutputIntent isn't GTS_PDFA1")); continue; } // ---- OutputConditionIdentifier is a mandatory field String outputConditionIdentifier = dictionary .getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_OUTPUT_CONDITION_IDENTIFIER)); if (outputConditionIdentifier == null) {// empty string is autorized (it may be an application specific value) result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "The OutputIntentCondition is missing")); continue; } // ---- If OutputConditionIdentifier is "Custom" or a non Standard ICC Characterization : // ---- DestOutputProfile and Info are mandatory // ---- DestOutputProfile must be a ICC Profile // ---- Because of PDF/A conforming file needs to specify the color characteristics, the DestOutputProfile // ---- is checked even if the OutputConditionIdentifier isn't "Custom" COSBase dop = dictionary .getItem(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_DEST_OUTPUT_PROFILE)); ValidationError valer = validateICCProfile(dop, cDoc, tmpDestOutputProfile, handler); if (valer != null) { result.add(valer); continue; } // TODO [LAZY] When Lazy mode will be added, this block should be uncommented to set result as warning. // if (!isStandardICCCharacterization(outputConditionIdentifier)) { // String info = dictionary.getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_INFO)); // if (info == null || "".equals(info)) { // result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, // "The Info entry of a OutputIntent dictionary is missing")); // continue; // } // } } } return result; }
From source file:org.apache.padaf.preflight.helpers.MetadataValidationHelper.java
License:Apache License
public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException { try {/*from www .j a v a2 s . com*/ PDDocument document = handler.getDocument(); byte[] tmp = getXpacket(document.getDocument()); XMPDocumentBuilder builder; try { builder = new XMPDocumentBuilder(); builder.addPreprocessor(new PDFAExtentionSchemaPreprocessor()); } catch (XmpSchemaException e1) { throw new ValidationException(e1.getMessage(), e1); } XMPMetadata metadata; try { metadata = builder.parse(tmp); handler.setMetadata(metadata); } catch (XmpSchemaException e) { throw new ValidationException("Parser: Internal Problem (failed to instanciate Schema object)", e); } catch (XmpXpacketEndException e) { throw new ValidationException("Unable to parse font metadata due to : " + e.getMessage(), e); } List<ValidationError> lve = new ArrayList<ValidationError>(); // 6.7.5 no deprecated attribute in xpacket processing instruction if (metadata.getXpacketBytes() != null) { lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_XPACKET_DEPRECATED, "bytes attribute is forbidden")); } if (metadata.getXpacketEncoding() != null) { lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_XPACKET_DEPRECATED, "encoding attribute is forbidden")); } // Call metadata synchronization checking lve.addAll(new SynchronizedMetaDataValidation().validateMetadataSynchronization(document, metadata)); // Call PDF/A Identifier checking lve.addAll(new PDFAIdentificationValidation().validatePDFAIdentifer(metadata)); // Call rdf:about checking try { new RDFAboutAttributeConcordanceValidation().validateRDFAboutAttributes(metadata); } catch (DifferentRDFAboutException e) { lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_RDF_ABOUT_ATTRIBUTE_INEQUAL_VALUE, e.getMessage())); } return lve; } catch (XpacketParsingException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); if (e.getError() != null) { lve.add(e.getError()); } else { lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_MAIN, "Unexpected error")); } return lve; } catch (XmpPropertyFormatException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_FORMAT, e.getMessage())); return lve; } catch (BadFieldValueException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_CATEGORY_PROPERTY_INVALID, e.getMessage())); return lve; } catch (XmpExpectedRdfAboutAttribute e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_RDF_ABOUT_ATTRIBUTE_MISSING, e.getMessage())); return lve; } catch (XmpUnknownPropertyException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_UNKNOWN, e.getMessage())); return lve; } catch (XmpUnknownSchemaException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_ABSENT_DESCRIPTION_SCHEMA, e.getMessage())); return lve; } catch (XmpUnexpectedNamespaceURIException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_WRONG_NS_URI, e.getMessage())); return lve; } catch (XmpUnexpectedNamespacePrefixException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_ABSENT_DESCRIPTION_SCHEMA, e.getMessage())); return lve; } catch (XmpRequiredPropertyException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_PROPERTY_MISSING, e.getMessage())); return lve; } catch (XmpUnknownValueTypeException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_UNKNOWN_VALUETYPE, e.getMessage())); return lve; } catch (XmpParsingException e) { List<ValidationError> lve = new ArrayList<ValidationError>(); lve.add(new ValidationError(ValidationConstants.ERROR_METADATA_FORMAT, e.getMessage())); return lve; } catch (IOException e) { throw new ValidationException("Failed while validating", e); } }
From source file:org.apache.padaf.preflight.helpers.TrailerValidationHelper.java
License:Apache License
@Override public List<ValidationError> innerValidate(DocumentHandler handler) throws ValidationException { List<ValidationError> result = new ArrayList<ValidationError>(0); PDDocument pdfDoc = handler.getDocument(); COSDictionary linearizedDict = isLinearizedPdf(pdfDoc); if (linearizedDict != null) { // it is a linearized PDF, check the linearized dictionary checkLinearizedDictionnary(linearizedDict, result); // if the pdf is a linearized pdf. the first trailer must be checked // and it must have the same ID than the last trailer. // According to the PDF version, trailers are available by the trailer key word (pdf <= 1.4) // or in the dictionary of the XRef stream ( PDF >= 1.5) String pdfVersion = pdfDoc.getDocument().getHeaderString(); if (pdfVersion != null && pdfVersion.matches("%PDF-1\\.[1-4]")) { checkTrailersForLinearizedPDF14(handler, result); } else {/*from w w w . j a v a 2 s .c om*/ checkTrailersForLinearizedPDF15(handler, result); } } else { // If the PDF isn't a linearized one, only the last trailer must be checked checkMainTrailer(pdfDoc.getDocument(), pdfDoc.getDocument().getTrailer(), result); } return result; }