List of usage examples for org.apache.pdfbox.pdmodel PDDocument load
public static PDDocument load(byte[] input) throws IOException
From source file:org.apache.fop.render.pdf.StructureTreeMergerTestCase.java
License:Apache License
@Test public void testOBJRCorrectPosition() throws IOException { setUp();//from w w w .j a v a 2 s . com PDDocument doc = PDDocument.load(new File(getClass().getResource(MissingOBJR).getFile())); PDPage srcPage = doc.getPage(0); PageParentTreeFinder finder = new PageParentTreeFinder(srcPage); COSArray markedContentParents = finder.getPageParentTreeArray(doc); PDFStructElem elem = new PDFStructElem(); elem.setObjectNumber(2); adapter = new PDFBoxAdapter(pdfPage, new HashMap(), new HashMap<Integer, PDFArray>()); PDFLogicalStructureHandler handler = setUpPDFLogicalStructureHandler(); StructureTreeMerger merger = new StructureTreeMerger(elem, handler, adapter, srcPage); merger.copyStructure(markedContentParents); // PDFArray array = handler.getPageParentTree(); // PDFStructElem kid = (PDFStructElem)array.get(0); // PDFReference reference = (PDFReference) kid.get("P"); // PDFStructElem parent = (PDFStructElem)reference.getObject(); // List<PDFObject> kids = parent.getKids(); // PDFDictionary first = (PDFDictionary) kids.get(0); // Assert.assertEquals(first.get("Type").toString(), "/OBJR"); // PDFDictionary last = (PDFDictionary) kids.get(2); // Assert.assertEquals(last.get("Type").toString(), "/OBJR"); // PDFStructElem middle = (PDFStructElem) kids.get(1); // Assert.assertEquals(middle.get("Type").toString(), "/StructElem"); }
From source file:org.apache.fop.render.pdf.StructureTreeMergerTestCase.java
License:Apache License
@Test public void testCheckNullCOSObject() throws IOException { setUp();// ww w.java 2 s.c o m PDDocument doc = PDDocument.load(new File(getClass().getResource(BrokenLink).getFile())); PDPage srcPage = doc.getPage(0); PageParentTreeFinder finder = new PageParentTreeFinder(srcPage); COSArray markedContentParents = finder.getPageParentTreeArray(doc); COSObject nullObj = new COSObject(null); nullObj.setObjectNumber(100); nullObj.setGenerationNumber(0); PDFStructElem elem = new PDFStructElem(); elem.setObjectNumber(2); COSObject parent = (COSObject) markedContentParents.get(1); COSArray kids = (COSArray) parent.getDictionaryObject(COSName.K); COSDictionary kid = (COSDictionary) kids.get(1); kid.setItem(COSName.OBJ, nullObj); adapter = new PDFBoxAdapter(pdfPage, new HashMap(), new HashMap<Integer, PDFArray>()); PDFLogicalStructureHandler handler = setUpPDFLogicalStructureHandler(); StructureTreeMerger merger = new StructureTreeMerger(elem, handler, adapter, srcPage); merger.copyStructure(markedContentParents); PDFArray array = handler.getPageParentTree(); PDFStructElem parentElem = (PDFStructElem) array.get(1); PDFDictionary objrDict = (PDFDictionary) parentElem.getKids().get(1); Assert.assertNull(objrDict.get("Obj")); }
From source file:org.apache.fop.render.pdf.TaggedPDFConductorTestCase.java
License:Apache License
private void runConductor(String pdf, PDFStructElem elem) throws IOException { setUp();/* w ww.ja va2s . c o m*/ PDDocument doc = PDDocument.load(new File(getClass().getResource(pdf).getFile())); PDPage srcPage = doc.getPage(0); elem.setObjectNumber(2); PDFBoxAdapter adapter = new PDFBoxAdapter(pdfPage, new HashMap(), new HashMap<Integer, PDFArray>()); PDFLogicalStructureHandler handler = setUpPDFLogicalStructureHandler(); new TaggedPDFConductor(elem, handler, srcPage, adapter).handleLogicalStructure(doc); }
From source file:org.apache.james.mailbox.store.search.PDFTextExtractor.java
License:Apache License
private ParsedContent extractTextFromPDF(InputStream inputStream) throws IOException { return new ParsedContent(Optional.ofNullable(new PDFTextStripper().getText(PDDocument.load(inputStream))), ImmutableMap.of());/* www . j a va 2 s .c om*/ }
From source file:org.apache.padaf.preflight.ExtractStream.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 3) { System.err.println("usage : ExtractStream file objNum objGen"); }//from w w w.j av a 2 s .c om PDDocument document = PDDocument.load(new FileInputStream(args[0])); COSObject obj = document.getDocument() .getObjectFromPool(new COSObjectKey(Integer.parseInt(args[1]), Integer.parseInt(args[2]))); if (obj.getObject() instanceof COSStream) { COSStream stream = (COSStream) obj.getObject(); InputStream is = stream.getUnfilteredStream(); FileOutputStream out = new FileOutputStream("stream.out"); IOUtils.copyLarge(is, out); IOUtils.closeQuietly(out); } }
From source file:org.apache.padaf.preflight.PdfA1bValidator.java
License:Apache License
public synchronized ValidationResult validate(DataSource source) throws ValidationException { DocumentHandler handler = createDocumentHandler(source); try {/*from ww w . j a v a 2 s . com*/ ArrayList<ValidationError> allErrors = new ArrayList<ValidationError>(); // syntax (javacc) validation try { InputStreamReader reader = new InputStreamReader(source.getInputStream(), encoding); PDFParser parser = new PDFParser(reader); handler.setParser(parser); parser.PDF(); } catch (IOException e) { throw new ValidationException("Failed to parse datasource due to : " + e.getMessage(), e); } catch (ParseException e) { allErrors.addAll(createErrorResult(e).getErrorsList()); } // if here is reached, validate with helpers // init PDF Box document PDDocument document = null; try { document = PDDocument.load(handler.getSource().getInputStream()); handler.setDocument(document); } catch (IOException e) { throw new ValidationException("PDFBox failed to parse datasource", e); } // init PDF Extractor try { SimpleCharStream scs = new SimpleCharStream(source.getInputStream()); ExtractorTokenManager extractor = new ExtractorTokenManager(scs); extractor.parse(); handler.setPdfExtractor(extractor); } catch (IOException e) { throw new ValidationException("PDF ExtractorTokenMng failed to parse datasource", e); } /* * call all helpers */ // Execute priority helpers. for (AbstractValidationHelper helper : priorHelpers) { runValidation(handler, helper, allErrors); } // Execute other helpers. for (AbstractValidationHelper helper : standHelpers) { runValidation(handler, helper, allErrors); } // check result ValidationResult valRes = null; if (allErrors.size() == 0) { valRes = new ValidationResult(true); } else { // there are some errors valRes = new ValidationResult(allErrors); } // addition of the some objects to avoid a second file parsing valRes.setPdf(document); valRes.setXmpMetaData(handler.getMetadata()); return valRes; } catch (ValidationException e) { // ---- Close all open resources if an error occurs. handler.close(); throw e; } }
From source file:org.apache.padaf.preflight.RetrieveMissingStream.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("usage : RetrieveMissingStream file"); System.exit(233);/*from ww w . j ava 2s. c o m*/ } HashSet<COSObjectKey> listOfKeys = new HashSet<COSObjectKey>(); PDDocument document = PDDocument.load(new FileInputStream(args[0])); List<COSObject> lCosObj = document.getDocument().getObjects(); for (COSObject cosObject : lCosObj) { if (cosObject.getObject() instanceof COSStream) { listOfKeys.add(new COSObjectKey(cosObject.getObjectNumber().intValue(), cosObject.getGenerationNumber().intValue())); } } PDDocumentCatalog catalog = document.getDocumentCatalog(); List<?> pages = catalog.getAllPages(); for (int i = 0; i < pages.size(); ++i) { PDPage pdp = (PDPage) pages.get(i); PDStream pdStream = pdp.getContents(); COSBase b = pdp.getCOSDictionary().getItem(COSName.getPDFName("Contents")); System.out.println(); } }
From source file:org.apache.pdflens.Main.java
License:Apache License
/** * This will parse a document.//from w w w . j av a 2s. c o m * * @param input The input stream for the document. * * @return The document. * * @throws IOException If there is an error parsing the document. */ private static PDDocument parseDocument(InputStream input) throws IOException { PDDocument document = PDDocument.load(input); if (document.isEncrypted()) { try { document.decrypt(""); } catch (InvalidPasswordException e) { System.err.println("Error: The document is encrypted."); } catch (org.apache.pdfbox.exceptions.CryptographyException e) { e.printStackTrace(); } } return document; }
From source file:org.apache.syncope.client.console.wicket.markup.html.form.preview.BinaryPDFPreviewer.java
License:Apache License
@Override public Component preview(final byte[] uploadedBytes) { firstPage = null;// w w w . j a va 2 s . c om PDDocument document = null; try { document = PDDocument.load(new ByteArrayInputStream(uploadedBytes)); if (document.isEncrypted()) { LOG.info("Document is encrypted, no preview is possible"); } else { firstPage = new PDFRenderer(document).renderImage(0, RESOLUTION, IMAGE_TYPE); } } catch (IOException e) { LOG.error("While generating thumbnail from first page", e); } finally { IOUtils.closeQuietly(document); } Fragment fragment; if (firstPage == null) { fragment = new Fragment("preview", "noPreviewFragment", this); } else { fragment = new Fragment("preview", "previewFragment", this); fragment.add(new NonCachingImage("previewImage", new ThumbnailImageResource(firstPage))); } WebMarkupContainer previewContainer = new WebMarkupContainer("previewContainer"); previewContainer.setOutputMarkupId(true); previewContainer.add(fragment); return this.addOrReplace(previewContainer); }
From source file:org.argrr.extractor.gdrive.downloader.ChartsDownloader.java
License:Open Source License
public static void extractPictures(String path, String fileName) throws IOException { PDDocument document = null;// w w w . j a va 2 s. c o m try { document = PDDocument.load(path + "/" + fileName + ".pdf"); } catch (IOException ex) { System.out.println("" + ex); } List pages = document.getDocumentCatalog().getAllPages(); Iterator iter = pages.iterator(); int i = 1; String name = null; while (iter.hasNext()) { PDPage page = (PDPage) iter.next(); PDResources resources = page.getResources(); Map pageImages = resources.getImages(); if (pageImages != null) { Iterator imageIter = pageImages.keySet().iterator(); while (imageIter.hasNext()) { String key = (String) imageIter.next(); PDXObjectImage image = (PDXObjectImage) pageImages.get(key); image.write2file(ChartsDownloader.rootOutputPathCharts + "/" + fileName + "-" + i); i++; } } } }