List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog
public PDDocumentCatalog getDocumentCatalog()
From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java
License:Apache License
private COSDictionary getFont(PDDocument doc, String internalname) throws IOException { PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0); PDResources sourcePageResources = page.getResources(); COSDictionary fonts = (COSDictionary) sourcePageResources.getCOSObject().getDictionaryObject(COSName.FONT); return (COSDictionary) fonts.getDictionaryObject(internalname); }
From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java
License:Apache License
@Test public void testStream() throws Exception { PDFDocument pdfdoc = new PDFDocument(""); PDFPage pdfpage = new PDFPage(new PDFResources(pdfdoc), 0, r, r, r, r); pdfpage.setDocument(pdfdoc);/*from ww w . ja v a 2s .co m*/ PDFBoxAdapter adapter = new PDFBoxAdapter(pdfpage, new HashMap(), new HashMap<Integer, PDFArray>()); PDDocument doc = getResource(ROTATE); PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0); AffineTransform at = new AffineTransform(); Rectangle r = new Rectangle(0, 1650, 842000, 595000); String stream = adapter.createStreamFromPDFBoxPage(doc, page, "key", at, null, r); Assert.assertEquals(at, new AffineTransform(-0.0, 1.0000000554888686, 1.0000000554888686, 0.0, 0.0, -2.0742416381835938E-5)); Assert.assertTrue(stream.contains("/GS0106079 gs")); Assert.assertTrue(stream.contains("/TT0106079 1 Tf")); doc.close(); }
From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java
License:Apache License
@Test public void testTaggedPDFWriter() throws IOException { PDFDocument pdfdoc = new PDFDocument(""); PDFPage pdfpage = new PDFPage(new PDFResources(pdfdoc), 0, r, r, r, r); pdfpage.setDocument(pdfdoc);/*from ww w . j a v a 2 s . c om*/ PDFBoxAdapter adapter = new PDFBoxAdapter(pdfpage, new HashMap(), new HashMap<Integer, PDFArray>()); adapter.setCurrentMCID(5); PDDocument doc = getResource(HELLOTagged); PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0); AffineTransform at = new AffineTransform(); Rectangle r = new Rectangle(0, 1650, 842000, 595000); String stream = adapter.createStreamFromPDFBoxPage(doc, page, "key", at, null, r); Assert.assertTrue(stream, stream.contains("/P <</MCID 5 >>BDC")); doc.close(); }
From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java
License:Apache License
@Test public void testLink() throws Exception { PDFDocument pdfdoc = new PDFDocument(""); PDFPage pdfpage = new PDFPage(new PDFResources(pdfdoc), 0, r, r, r, r); pdfpage.setDocument(pdfdoc);//www. ja v a2 s .co m pdfpage.setObjectNumber(1); Map<Integer, PDFArray> pageNumbers = new HashMap<Integer, PDFArray>(); PDFBoxAdapter adapter = new PDFBoxAdapter(pdfpage, new HashMap(), pageNumbers); PDDocument doc = getResource(LINK); PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0); AffineTransform at = new AffineTransform(); Rectangle r = new Rectangle(0, 1650, 842000, 595000); String stream = adapter.createStreamFromPDFBoxPage(doc, page, "key", at, null, r); Assert.assertTrue(stream.contains("/Link <</MCID 5 >>BDC")); Assert.assertEquals(pageNumbers.size(), 4); PDFAnnotList annots = (PDFAnnotList) pdfpage.get("Annots"); Assert.assertEquals(annots.toPDFString(), "[\n1 0 R\n2 0 R\n]"); doc.close(); }
From source file:org.apache.fop.render.pdf.PDFBoxAdapterTestCase.java
License:Apache License
@Test public void testXform() throws Exception { PDFDocument pdfdoc = new PDFDocument(""); pdfdoc.getFilterMap().put(PDFFilterList.DEFAULT_FILTER, Arrays.asList("null")); pdfdoc.setMergeFontsEnabled(true);// w ww. j a v a 2 s .co m PDFPage pdfpage = new PDFPage(new PDFResources(pdfdoc), 0, r, r, r, r); pdfpage.setDocument(pdfdoc); pdfpage.setObjectNumber(1); Map<Integer, PDFArray> pageNumbers = new HashMap<Integer, PDFArray>(); PDFBoxAdapter adapter = new PDFBoxAdapter(pdfpage, new HashMap(), pageNumbers); PDDocument doc = getResource(XFORM); PDPage page = (PDPage) doc.getDocumentCatalog().getPages().get(0); AffineTransform at = new AffineTransform(); Rectangle r = new Rectangle(0, 1650, 842000, 595000); adapter.createStreamFromPDFBoxPage(doc, page, "key", at, new FontInfo(), r); doc.close(); ByteArrayOutputStream bos = new ByteArrayOutputStream(); pdfdoc.output(bos); Assert.assertFalse(bos.toString("UTF-8").contains("/W 5 /H 5 /BPC 8 /CS /RGB ID ")); }
From source file:org.apache.padaf.preflight.helpers.CatalogValidationHelper.java
License:Apache License
/** * This method checks the content of each OutputIntent. The S entry must * contain GTS_PDFA1. The DestOuputProfile must contain a valid ICC Profile * Stream./* w w w. j ava 2 s . c om*/ * * If there are more than one OutputIntent, they have to use the same ICC * Profile. * * This method returns a list of ValidationError. It is empty if no errors * have been found. * * @param handler * @return * @throws ValidationException */ public List<ValidationError> validateOutputIntent(DocumentHandler handler) throws ValidationException { List<ValidationError> result = new ArrayList<ValidationError>(0); PDDocument pdDocument = handler.getDocument(); PDDocumentCatalog catalog = pdDocument.getDocumentCatalog(); COSDocument cDoc = pdDocument.getDocument(); COSBase cBase = catalog.getCOSDictionary() .getItem(COSName.getPDFName(DOCUMENT_DICTIONARY_KEY_OUTPUT_INTENTS)); COSArray outputIntents = COSUtils.getAsArray(cBase, cDoc); Map<COSObjectKey, Boolean> tmpDestOutputProfile = new HashMap<COSObjectKey, Boolean>(); for (int i = 0; outputIntents != null && i < outputIntents.size(); ++i) { COSDictionary dictionary = COSUtils.getAsDictionary(outputIntents.get(i), cDoc); if (dictionary == null) { result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "OutputIntent object is null or isn't a dictionary")); } else { // ---- S entry is mandatory and must be equals to GTS_PDFA1 String sValue = dictionary.getNameAsString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_S)); if (!OUTPUT_INTENT_DICTIONARY_VALUE_GTS_PDFA1.equals(sValue)) { result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_S_VALUE_INVALID, "The S entry of the OutputIntent isn't GTS_PDFA1")); continue; } // ---- OutputConditionIdentifier is a mandatory field String outputConditionIdentifier = dictionary .getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_OUTPUT_CONDITION_IDENTIFIER)); if (outputConditionIdentifier == null) {// empty string is autorized (it may be an application specific value) result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, "The OutputIntentCondition is missing")); continue; } // ---- If OutputConditionIdentifier is "Custom" or a non Standard ICC Characterization : // ---- DestOutputProfile and Info are mandatory // ---- DestOutputProfile must be a ICC Profile // ---- Because of PDF/A conforming file needs to specify the color characteristics, the DestOutputProfile // ---- is checked even if the OutputConditionIdentifier isn't "Custom" COSBase dop = dictionary .getItem(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_DEST_OUTPUT_PROFILE)); ValidationError valer = validateICCProfile(dop, cDoc, tmpDestOutputProfile, handler); if (valer != null) { result.add(valer); continue; } // TODO [LAZY] When Lazy mode will be added, this block should be uncommented to set result as warning. // if (!isStandardICCCharacterization(outputConditionIdentifier)) { // String info = dictionary.getString(COSName.getPDFName(OUTPUT_INTENT_DICTIONARY_KEY_INFO)); // if (info == null || "".equals(info)) { // result.add(new ValidationError(ERROR_GRAPHIC_OUTPUT_INTENT_INVALID_ENTRY, // "The Info entry of a OutputIntent dictionary is missing")); // continue; // } // } } } return result; }
From source file:org.apache.padaf.preflight.RetrieveMissingStream.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("usage : RetrieveMissingStream file"); System.exit(233);/*from w ww . j ava2s.co m*/ } HashSet<COSObjectKey> listOfKeys = new HashSet<COSObjectKey>(); PDDocument document = PDDocument.load(new FileInputStream(args[0])); List<COSObject> lCosObj = document.getDocument().getObjects(); for (COSObject cosObject : lCosObj) { if (cosObject.getObject() instanceof COSStream) { listOfKeys.add(new COSObjectKey(cosObject.getObjectNumber().intValue(), cosObject.getGenerationNumber().intValue())); } } PDDocumentCatalog catalog = document.getDocumentCatalog(); List<?> pages = catalog.getAllPages(); for (int i = 0; i < pages.size(); ++i) { PDPage pdp = (PDPage) pages.get(i); PDStream pdStream = pdp.getContents(); COSBase b = pdp.getCOSDictionary().getItem(COSName.getPDFName("Contents")); System.out.println(); } }
From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java
License:Apache License
private void extractEmbeddedDocuments(PDDocument document) throws IOException, SAXException, TikaException { PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(document.getDocumentCatalog()); PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles(); if (efTree == null) { return;//from ww w . jav a2 s .c om } Map<String, PDComplexFileSpecification> embeddedFileNames = efTree.getNames(); //For now, try to get the embeddedFileNames out of embeddedFiles or its kids. //This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java //If there is a need we could add a fully recursive search to find a non-null //Map<String, COSObjectable> that contains the doc info. if (embeddedFileNames != null) { processEmbeddedDocNames(embeddedFileNames); } else { List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids(); if (kids == null) { return; } for (PDNameTreeNode<PDComplexFileSpecification> node : kids) { embeddedFileNames = node.getNames(); if (embeddedFileNames != null) { processEmbeddedDocNames(embeddedFileNames); } } } }
From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java
License:Apache License
@Override protected void startDocument(PDDocument pdf) throws IOException { try {//w ww. jav a 2s.co m xhtml.startDocument(); handleDestinationOrAction(pdf.getDocumentCatalog().getOpenAction(), ActionTrigger.DOCUMENT_OPEN); } catch (TikaException | SAXException e) { throw new IOExceptionWithCause("Unable to start a document", e); } }
From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java
License:Apache License
@Override protected void endDocument(PDDocument pdf) throws IOException { try {/*from ww w. ja v a 2 s. c o m*/ // Extract text for any bookmarks: extractBookmarkText(); try { extractEmbeddedDocuments(pdf); } catch (IOException e) { handleCatchableIOE(e); } //extract acroform data at end of doc if (config.getExtractAcroFormContent() == true) { try { extractAcroForm(pdf); } catch (IOException e) { handleCatchableIOE(e); } } PDDocumentCatalogAdditionalActions additionalActions = pdf.getDocumentCatalog().getActions(); handleDestinationOrAction(additionalActions.getDP(), ActionTrigger.AFTER_DOCUMENT_PRINT); handleDestinationOrAction(additionalActions.getDS(), ActionTrigger.AFTER_DOCUMENT_SAVE); handleDestinationOrAction(additionalActions.getWC(), ActionTrigger.BEFORE_DOCUMENT_CLOSE); handleDestinationOrAction(additionalActions.getWP(), ActionTrigger.BEFORE_DOCUMENT_PRINT); handleDestinationOrAction(additionalActions.getWS(), ActionTrigger.BEFORE_DOCUMENT_SAVE); xhtml.endDocument(); } catch (TikaException e) { throw new IOExceptionWithCause("Unable to end a document", e); } catch (SAXException e) { throw new IOExceptionWithCause("Unable to end a document", e); } }