List of usage examples for org.apache.pdfbox.pdmodel PDDocument getPages
public PDPageTree getPages()
From source file:at.gv.egiz.pdfas.lib.impl.pdfbox2.placeholder.SignaturePlaceholderExtractor.java
License:EUPL
/** * Search the document for placeholder images and possibly included * additional info.<br/>/* w w w. jav a 2 s.co m*/ * Searches only for the first placeholder page after page from top. * * @param inputStream * @return all available info from the first found placeholder. * @throws PDFDocumentException * if the document could not be read. * @throws PlaceholderExtractionException * if STRICT matching mode was requested and no suitable * placeholder could be found. */ public static SignaturePlaceholderData extract(PDDocument doc, String placeholderId, int matchMode) throws PdfAsException { SignaturePlaceholderContext.setSignaturePlaceholderData(null); SignaturePlaceholderExtractor extractor; try { extractor = new SignaturePlaceholderExtractor(placeholderId, matchMode, doc); } catch (IOException | ClassNotFoundException | InstantiationException | IllegalAccessException e2) { throw new PDFIOException("error.pdf.io.04", e2); } int pageNr = 0; for (PDPage page : doc.getPages()) { pageNr++; try { extractor.setCurrentPage(pageNr); if (page.getContents() != null && page.getResources() != null && page.getContentStreams() != null) { extractor.processPage(page); //TODO: pdfbox2 - right? } SignaturePlaceholderData ret = matchPlaceholderPage(extractor.placeholders, placeholderId, matchMode); if (ret != null) { SignaturePlaceholderContext.setSignaturePlaceholderData(ret); return ret; } } catch (IOException e1) { throw new PDFIOException("error.pdf.io.04", e1); } catch (Throwable e) { throw new PDFIOException("error.pdf.io.04", e); } } if (extractor.placeholders.size() > 0) { SignaturePlaceholderData ret = matchPlaceholderDocument(extractor.placeholders, placeholderId, matchMode); SignaturePlaceholderContext.setSignaturePlaceholderData(ret); return ret; } // no placeholders found, apply strict mode if set if (matchMode == PLACEHOLDER_MATCH_MODE_STRICT) { throw new PlaceholderExtractionException("error.pdf.stamp.09"); } return null; }
From source file:at.gv.egiz.pdfas.lib.impl.signing.pdfbox2.PADESPDFBOXSigner.java
License:EUPL
public void signPDF(PDFObject genericPdfObject, RequestedSignature requestedSignature, PDFASSignatureInterface genericSigner) throws PdfAsException { //String fisTmpFile = null; PDFAsVisualSignatureProperties properties = null; if (!(genericPdfObject instanceof PDFBOXObject)) { // tODO://from w w w .j a va 2 s. c o m throw new PdfAsException(); } PDFBOXObject pdfObject = (PDFBOXObject) genericPdfObject; if (!(genericSigner instanceof PDFASPDFBOXSignatureInterface)) { // tODO: throw new PdfAsException(); } PDFASPDFBOXSignatureInterface signer = (PDFASPDFBOXSignatureInterface) genericSigner; String pdfaVersion = null; PDDocument doc = null; SignatureOptions options = new SignatureOptions(); COSDocument visualSignatureDocumentGuard = null; try { doc = pdfObject.getDocument(); SignaturePlaceholderData signaturePlaceholderData = PlaceholderFilter .checkPlaceholderSignature(pdfObject.getStatus(), pdfObject.getStatus().getSettings()); TablePos tablePos = null; if (signaturePlaceholderData != null) { // Placeholder found! logger.info("Placeholder data found."); if (signaturePlaceholderData.getProfile() != null) { logger.debug("Placeholder Profile set to: " + signaturePlaceholderData.getProfile()); requestedSignature.setSignatureProfileID(signaturePlaceholderData.getProfile()); } tablePos = signaturePlaceholderData.getTablePos(); if (tablePos != null) { SignatureProfileConfiguration signatureProfileConfiguration = pdfObject.getStatus() .getSignatureProfileConfiguration(requestedSignature.getSignatureProfileID()); float minWidth = signatureProfileConfiguration.getMinWidth(); if (minWidth > 0) { if (tablePos.getWidth() < minWidth) { tablePos.width = minWidth; logger.debug("Correcting placeholder with to minimum width {}", minWidth); } } logger.debug("Placeholder Position set to: " + tablePos.toString()); } } PDSignature signature = new PDSignature(); signature.setFilter(COSName.getPDFName(signer.getPDFFilter())); // default // filter signature.setSubFilter(COSName.getPDFName(signer.getPDFSubFilter())); SignatureProfileSettings signatureProfileSettings = TableFactory .createProfile(requestedSignature.getSignatureProfileID(), pdfObject.getStatus().getSettings()); /* * Check if input document is PDF-A conform * if (signatureProfileSettings.isPDFA()) { // TODO: run preflight parser runPDFAPreflight(pdfObject.getOriginalDocument()); } */ ValueResolver resolver = new ValueResolver(requestedSignature, pdfObject.getStatus()); String signerName = resolver.resolve("SIG_SUBJECT", signatureProfileSettings.getValue("SIG_SUBJECT"), signatureProfileSettings); signature.setName(signerName); // take signing time from provided signer... signature.setSignDate(signer.getSigningDate()); // ...and update operation status in order to use exactly this date for the complete signing process requestedSignature.getStatus().setSigningDate(signer.getSigningDate()); String signerReason = signatureProfileSettings.getSigningReason(); if (signerReason == null) { signerReason = "PAdES Signature"; } signature.setReason(signerReason); logger.debug("Signing reason: " + signerReason); logger.debug("Signing @ " + signer.getSigningDate().getTime().toString()); // the signing date, needed for valid signature // signature.setSignDate(signer.getSigningDate()); signer.setPDSignature(signature); int signatureSize = 0x1000; try { String reservedSignatureSizeString = signatureProfileSettings.getValue(SIG_RESERVED_SIZE); if (reservedSignatureSizeString != null) { signatureSize = Integer.parseInt(reservedSignatureSizeString); } logger.debug("Reserving {} bytes for signature", signatureSize); } catch (NumberFormatException e) { logger.warn("Invalid configuration value: {} should be a number using 0x1000", SIG_RESERVED_SIZE); } options.setPreferredSignatureSize(signatureSize); if (signatureProfileSettings.isPDFA() || signatureProfileSettings.isPDFA3()) { pdfaVersion = getPDFAVersion(doc); signatureProfileSettings.setPDFAVersion(pdfaVersion); } // Is visible Signature if (requestedSignature.isVisual()) { logger.debug("Creating visual signature block"); SignatureProfileConfiguration signatureProfileConfiguration = pdfObject.getStatus() .getSignatureProfileConfiguration(requestedSignature.getSignatureProfileID()); if (tablePos == null) { // ================================================================ // PositioningStage (visual) -> find position or use // fixed // position String posString = pdfObject.getStatus().getSignParamter().getSignaturePosition(); TablePos signaturePos = null; String signaturePosString = signatureProfileConfiguration.getDefaultPositioning(); if (signaturePosString != null) { logger.debug("using signature Positioning: " + signaturePos); signaturePos = new TablePos(signaturePosString); } logger.debug("using Positioning: " + posString); if (posString != null) { // Merge Signature Position tablePos = new TablePos(posString, signaturePos); } else { // Fallback to signature Position! tablePos = signaturePos; } if (tablePos == null) { // Last Fallback default position tablePos = new TablePos(); } } //Legacy Modes not supported with pdfbox2 anymore // boolean legacy32Position = signatureProfileConfiguration.getLegacy32Positioning(); // boolean legacy40Position = signatureProfileConfiguration.getLegacy40Positioning(); // create Table describtion Table main = TableFactory.createSigTable(signatureProfileSettings, MAIN, pdfObject.getStatus(), requestedSignature); IPDFStamper stamper = StamperFactory.createDefaultStamper(pdfObject.getStatus().getSettings()); IPDFVisualObject visualObject = stamper.createVisualPDFObject(pdfObject, main); /* * PDDocument originalDocument = PDDocument .load(new * ByteArrayInputStream(pdfObject.getStatus() * .getPdfObject().getOriginalDocument())); */ PositioningInstruction positioningInstruction = Positioning.determineTablePositioning(tablePos, "", doc, visualObject, pdfObject.getStatus().getSettings()); logger.debug("Positioning: {}", positioningInstruction.toString()); if (positioningInstruction.isMakeNewPage()) { int last = doc.getNumberOfPages() - 1; PDDocumentCatalog root = doc.getDocumentCatalog(); PDPage lastPage = root.getPages().get(last); root.getPages().getCOSObject().setNeedToBeUpdated(true); PDPage p = new PDPage(lastPage.getMediaBox()); p.setResources(new PDResources()); p.setRotation(lastPage.getRotation()); doc.addPage(p); } // handle rotated page int targetPageNumber = positioningInstruction.getPage(); logger.debug("Target Page: " + targetPageNumber); PDPage targetPage = doc.getPages().get(targetPageNumber - 1); int rot = targetPage.getRotation(); logger.debug("Page rotation: " + rot); // positioningInstruction.setRotation(positioningInstruction.getRotation() // + rot); logger.debug("resulting Sign rotation: " + positioningInstruction.getRotation()); SignaturePositionImpl position = new SignaturePositionImpl(); position.setX(positioningInstruction.getX()); position.setY(positioningInstruction.getY()); position.setPage(positioningInstruction.getPage()); position.setHeight(visualObject.getHeight()); position.setWidth(visualObject.getWidth()); requestedSignature.setSignaturePosition(position); properties = new PDFAsVisualSignatureProperties(pdfObject.getStatus().getSettings(), pdfObject, (PdfBoxVisualObject) visualObject, positioningInstruction, signatureProfileSettings); properties.buildSignature(); /* * ByteArrayOutputStream sigbos = new * ByteArrayOutputStream(); * sigbos.write(StreamUtils.inputStreamToByteArray * (properties .getVisibleSignature())); sigbos.close(); */ if (signaturePlaceholderData != null) { // Placeholder found! // replace placeholder URL fileUrl = PADESPDFBOXSigner.class.getResource("/placeholder/empty.jpg"); PDImageXObject img = PDImageXObject.createFromFile(fileUrl.getPath(), doc); img.getCOSObject().setNeedToBeUpdated(true); // PDDocumentCatalog root = doc.getDocumentCatalog(); // PDPageNode rootPages = root.getPages(); // List<PDPage> kids = new ArrayList<PDPage>(); // rootPages.getAllKids(kids); int pageNumber = positioningInstruction.getPage(); PDPage page = doc.getPages().get(pageNumber - 1); logger.info("Placeholder name: " + signaturePlaceholderData.getPlaceholderName()); COSDictionary xobjectsDictionary = (COSDictionary) page.getResources().getCOSObject() .getDictionaryObject(COSName.XOBJECT); xobjectsDictionary.setItem(signaturePlaceholderData.getPlaceholderName(), img); xobjectsDictionary.setNeedToBeUpdated(true); page.getResources().getCOSObject().setNeedToBeUpdated(true); logger.info("Placeholder name: " + signaturePlaceholderData.getPlaceholderName()); } if (signatureProfileSettings.isPDFA() || signatureProfileSettings.isPDFA3()) { PDDocumentCatalog root = doc.getDocumentCatalog(); COSBase base = root.getCOSObject().getItem(COSName.OUTPUT_INTENTS); if (base == null) { InputStream colorProfile = null; try { colorProfile = PDDocumentCatalog.class .getResourceAsStream("/icm/sRGB Color Space Profile.icm"); try { PDOutputIntent oi = new PDOutputIntent(doc, colorProfile); oi.setInfo("sRGB IEC61966-2.1"); oi.setOutputCondition("sRGB IEC61966-2.1"); oi.setOutputConditionIdentifier("sRGB IEC61966-2.1"); oi.setRegistryName("http://www.color.org"); root.addOutputIntent(oi); root.getCOSObject().setNeedToBeUpdated(true); logger.info("added Output Intent"); } catch (Throwable e) { e.printStackTrace(); throw new PdfAsException("Failed to add Output Intent", e); } } finally { IOUtils.closeQuietly(colorProfile); } } } options.setPage(positioningInstruction.getPage()); options.setVisualSignature(properties.getVisibleSignature()); } visualSignatureDocumentGuard = options.getVisualSignature(); doc.addSignature(signature, signer, options); String sigFieldName = signatureProfileSettings.getSignFieldValue(); if (sigFieldName == null) { sigFieldName = "PDF-AS Signatur"; } int count = PdfBoxUtils.countSignatures(doc, sigFieldName); sigFieldName = sigFieldName + count; PDAcroForm acroFormm = doc.getDocumentCatalog().getAcroForm(); // PDStructureTreeRoot pdstRoot = // doc.getDocumentCatalog().getStructureTreeRoot(); // COSDictionary dic = // doc.getDocumentCatalog().getCOSDictionary(); // PDStructureElement el = new PDStructureElement("Widget", // pdstRoot); PDSignatureField signatureField = null; if (acroFormm != null) { @SuppressWarnings("unchecked") List<PDField> fields = acroFormm.getFields(); if (fields != null) { for (PDField pdField : fields) { if (pdField != null) { if (pdField instanceof PDSignatureField) { PDSignatureField tmpSigField = (PDSignatureField) pdField; if (tmpSigField.getSignature() != null && tmpSigField.getSignature().getCOSObject() != null) { if (tmpSigField.getSignature().getCOSObject() .equals(signature.getCOSObject())) { signatureField = (PDSignatureField) pdField; } } } } } } else { logger.warn("Failed to name Signature Field! [Cannot find Field list in acroForm!]"); } if (signatureField != null) { signatureField.setPartialName(sigFieldName); } if (properties != null) { signatureField.setAlternateFieldName(properties.getAlternativeTableCaption()); } else { signatureField.setAlternateFieldName(sigFieldName); } } else { logger.warn("Failed to name Signature Field! [Cannot find acroForm!]"); } // PDF-UA logger.info("Adding pdf/ua content."); try { PDDocumentCatalog root = doc.getDocumentCatalog(); PDStructureTreeRoot structureTreeRoot = root.getStructureTreeRoot(); if (structureTreeRoot != null) { logger.info("Tree Root: {}", structureTreeRoot.toString()); List<Object> kids = structureTreeRoot.getKids(); if (kids == null) { logger.info("No kid-elements in structure tree Root, maybe not PDF/UA document"); } PDStructureElement docElement = null; for (Object k : kids) { if (k instanceof PDStructureElement) { docElement = (PDStructureElement) k; break; } } PDStructureElement sigBlock = new PDStructureElement("Form", docElement); // create object dictionary and add as child element COSDictionary objectDic = new COSDictionary(); objectDic.setName("Type", "OBJR"); objectDic.setItem("Pg", signatureField.getWidget().getPage()); objectDic.setItem("Obj", signatureField.getWidget()); List<Object> l = new ArrayList<Object>(); l.add(objectDic); sigBlock.setKids(l); sigBlock.setPage(signatureField.getWidget().getPage()); sigBlock.setTitle("Signature Table"); sigBlock.setParent(docElement); docElement.appendKid(sigBlock); // Create and add Attribute dictionary to mitigate PAC // warning COSDictionary sigBlockDic = (COSDictionary) sigBlock.getCOSObject(); COSDictionary sub = new COSDictionary(); sub.setName("O", "Layout"); sub.setName("Placement", "Block"); sigBlockDic.setItem(COSName.A, sub); sigBlockDic.setNeedToBeUpdated(true); // Modify number tree PDNumberTreeNode ntn = structureTreeRoot.getParentTree(); int parentTreeNextKey = structureTreeRoot.getParentTreeNextKey(); if (ntn == null) { ntn = new PDNumberTreeNode(objectDic, null); logger.info("No number-tree-node found!"); } COSArray ntnKids = (COSArray) ntn.getCOSObject().getDictionaryObject(COSName.KIDS); COSArray ntnNumbers = (COSArray) ntn.getCOSObject().getDictionaryObject(COSName.NUMS); if (ntnNumbers == null && ntnKids != null) {//no number array, so continue with the kids array //create dictionary with limits and nums array COSDictionary pTreeEntry = new COSDictionary(); COSArray limitsArray = new COSArray(); //limits for exact one entry limitsArray.add(COSInteger.get(parentTreeNextKey)); limitsArray.add(COSInteger.get(parentTreeNextKey)); COSArray numsArray = new COSArray(); numsArray.add(COSInteger.get(parentTreeNextKey)); numsArray.add(sigBlock); pTreeEntry.setItem(COSName.NUMS, numsArray); pTreeEntry.setItem(COSName.LIMITS, limitsArray); PDNumberTreeNode newKidsElement = new PDNumberTreeNode(pTreeEntry, PDNumberTreeNode.class); ntnKids.add(newKidsElement); ntnKids.setNeedToBeUpdated(true); } else if (ntnNumbers != null && ntnKids == null) { int arrindex = ntnNumbers.size(); ntnNumbers.add(arrindex, COSInteger.get(parentTreeNextKey)); ntnNumbers.add(arrindex + 1, sigBlock.getCOSObject()); ntnNumbers.setNeedToBeUpdated(true); structureTreeRoot.setParentTree(ntn); } else if (ntnNumbers == null && ntnKids == null) { //document is not pdfua conform before signature creation throw new PdfAsException("error.pdf.sig.pdfua.1"); } else { //this is not allowed throw new PdfAsException("error.pdf.sig.pdfua.1"); } // set StructureParent for signature field annotation signatureField.getWidget().setStructParent(parentTreeNextKey); //Increase the next Key value in the structure tree root structureTreeRoot.setParentTreeNextKey(parentTreeNextKey + 1); // add the Tabs /S Element for Tabbing through annots PDPage p = signatureField.getWidget().getPage(); p.getCOSObject().setName("Tabs", "S"); p.getCOSObject().setNeedToBeUpdated(true); //check alternative signature field name if (signatureField != null) { if (signatureField.getAlternateFieldName().equals("")) signatureField.setAlternateFieldName(sigFieldName); } ntn.getCOSObject().setNeedToBeUpdated(true); sigBlock.getCOSObject().setNeedToBeUpdated(true); structureTreeRoot.getCOSObject().setNeedToBeUpdated(true); objectDic.setNeedToBeUpdated(true); docElement.getCOSObject().setNeedToBeUpdated(true); } } catch (Throwable e) { if (signatureProfileSettings.isPDFUA() == true) { logger.error("Could not create PDF-UA conform document!"); throw new PdfAsException("error.pdf.sig.pdfua.1", e); } else { logger.info("Could not create PDF-UA conform signature"); } } try { ByteArrayOutputStream bos = new ByteArrayOutputStream(); synchronized (doc) { doc.saveIncremental(bos); byte[] outputDocument = bos.toByteArray(); /* Check if resulting pdf is PDF-A conform */ //if (signatureProfileSettings.isPDFA()) { // // TODO: run preflight parser // runPDFAPreflight(outputDocument); //} pdfObject.setSignedDocument(outputDocument); } } finally { if (options != null) { if (options.getVisualSignature() != null) { options.getVisualSignature().close(); } } } System.gc(); } catch (IOException e) { logger.warn(MessageResolver.resolveMessage("error.pdf.sig.01"), e); throw new PdfAsException("error.pdf.sig.01", e); } finally { if (doc != null) { try { doc.close(); } catch (IOException e) { logger.debug("Failed to close COS Doc!", e); // Ignore } } logger.debug("Signature done!"); } }
From source file:com.ackpdfbox.app.PDFToImage.java
License:Apache License
private static void changeCropBox(PDDocument document, float a, float b, float c, float d) { for (PDPage page : document.getPages()) { System.out.println("resizing page"); PDRectangle rectangle = new PDRectangle(); rectangle.setLowerLeftX(a);/*from ww w .j a va2 s . c o m*/ rectangle.setLowerLeftY(b); rectangle.setUpperRightX(c); rectangle.setUpperRightY(d); page.setCropBox(rectangle); } }
From source file:com.amolik.misc.ExtractTextByArea.java
License:Apache License
/** * This will print the documents text in a certain area. * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. *//*from w ww . j av a 2 s . com*/ public static void main(String[] args) throws IOException { //args[0]= "E:\\Automation\\uphillit\\Fiscal_demo_data.pdf"; // if( args.length != 1 ) // { // usage(); // } // else // { PDDocument document = null; try { document = PDDocument.load(new File("E:\\Automation\\uphillit\\Fiscal_demo_data.pdf")); int numberOfPages = document.getNumberOfPages(); if (numberOfPages > 0) { PDPage page = (PDPage) document.getPages().get(0); System.out.println(page.getContents()); } PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.setSortByPosition(true); Rectangle rect = new Rectangle(3, 1, 600, 6000); stripper.addRegion("class1", rect); PDPage firstPage = document.getPage(0); stripper.extractRegions(firstPage); System.out.println("Text in the area:" + rect); System.out.println(stripper.getTextForRegion("class1")); } finally { if (document != null) { document.close(); } } // } }
From source file:com.fileOperations.StampPDF.java
/** * This stamps docketed files./*from w w w. j av a 2 s . com*/ * * @param file String (full file path) * @param docketTime Timestamp * @param dept */ public static void stampDocument(String file, Timestamp docketTime, String dept) { // the document PDDocument doc = null; try { PDFont stampFont = PDType1Font.TIMES_ROMAN; float stampFontSize = 14; String title = PDFBoxTools.HeaderTimeStamp(docketTime) + " " + dept; float titleWidth = stampFont.getStringWidth(title) / 1000 * stampFontSize; float titleHeight = stampFont.getFontDescriptor().getFontBoundingBox().getHeight() / 1000 * stampFontSize; int marginTop = 20; doc = PDDocument.load(new File(file)); if (!doc.isEncrypted()) { for (int i = 0; i < doc.getPages().getCount(); i++) { PDPageContentStream contentStream = null; PDPage page = (PDPage) doc.getPages().get(i); contentStream = new PDPageContentStream(doc, page, AppendMode.APPEND, true, true); page.getResources().getFontNames(); contentStream.beginText(); contentStream.setFont(stampFont, stampFontSize); contentStream.setNonStrokingColor(Color.RED); contentStream.newLineAtOffset((page.getMediaBox().getWidth() - titleWidth) / 2, page.getMediaBox().getHeight() - marginTop - titleHeight); contentStream.showText(title); contentStream.endText(); contentStream.close(); } doc.save(file); } } catch (IOException ex) { ExceptionHandler.Handle(ex); } finally { if (doc != null) { try { doc.close(); } catch (IOException ex) { ExceptionHandler.Handle(ex); } } } }
From source file:com.formkiq.core.service.conversion.PdfToPngFormatConverter.java
License:Apache License
/** * Find {@link PDSignatureField} on the Image. * @param doc {@link PDDocument}/*from w w w. j a v a 2 s. c o m*/ * @param result {@link ConversionResult} * @return {@link List} of {@link ConversionField} * @throws IOException IOException */ private List<ConversionField> findSigningButtons(final PDDocument doc, final ConversionResult result) throws IOException { List<ConversionField> fields = new ArrayList<>(); List<PDSignatureField> sigs = doc.getSignatureFields(); for (PDSignatureField s : sigs) { PDRectangle rect = PDRectangleUtil.calculateWidget(s.getWidgets()); PDAnnotationWidget widget = s.getWidgets().get(0); PDPage page = widget.getPage(); int pageNumber = doc.getPages().indexOf(page); float imagePageSize = result.getDataheight() / doc.getNumberOfPages(); float x = rect.getLowerLeftX(); float y = (imagePageSize - rect.getUpperRightY()) + (imagePageSize * pageNumber); ConversionField f = new ConversionField(); f.setDocumentfieldname(s.getFullyQualifiedName()); f.setX(x); f.setY(y); f.setHeight(rect.getHeight()); fields.add(f); } return fields; }
From source file:com.formkiq.core.service.generator.pdfbox.PdfEditorServiceImpl.java
License:Apache License
/** * Generate {@link Map} {@link COSDictionary} to Page Numbers. * @param doc {@link PDDocument}//from www.j av a 2 s . com * @return {@link Map} of {@link COSDictionary} to {@link Integer} * @throws IOException IOException */ private Map<COSDictionary, Integer> getCOSDictionaryToPageNumberMap(final PDDocument doc) throws IOException { Map<COSDictionary, Integer> map = new HashMap<>(); PDPageTree pages = doc.getPages(); for (int i = 0; i < pages.getCount(); i++) { for (PDAnnotation annotation : pages.get(i).getAnnotations()) { COSDictionary annotationObject = annotation.getCOSObject(); map.put(annotationObject, Integer.valueOf(i)); } } return map; }
From source file:com.joowon.returnA.classifier.EbsBookCrawler.java
License:Open Source License
public void run() throws IOException { File destination = new File("/Users/Joowon/Desktop"); File bookFolder = new File(getClass().getClassLoader().getResource("book").getFile()); for (File book : bookFolder.listFiles()) { String outputName = destination.getPath() + "/" + book.getName().replace(".pdf", ".txt"); PDDocument document = PDDocument.load(book); String text = ""; for (PDPage page : document.getPages()) { text += new PdfTextExtractor(page) .addRegion(0, 0, (int) page.getMediaBox().getWidth(), (int) page.getMediaBox().getHeight()) .extract();/*from w ww .ja v a2 s. c om*/ } new TxtWriter(outputName).write(text); document.close(); } }
From source file:com.testautomationguru.utility.PDFUtil.java
License:Apache License
/** * This method extracts all the embedded images of the pdf document *///from ww w . ja va2 s . c o m private List<String> extractimages(String file, int startPage, int endPage) { logger.info("file : " + file); logger.info("startPage : " + startPage); logger.info("endPage : " + endPage); ArrayList<String> imgNames = new ArrayList<String>(); boolean bImageFound = false; try { this.createImageDestinationDirectory(file); String fileName = this.getFileName(file).replace(".pdf", "_resource"); PDDocument document = PDDocument.load(new File(file)); PDPageTree list = document.getPages(); this.updateStartAndEndPages(file, startPage, endPage); int totalImages = 1; for (int iPage = this.startPage - 1; iPage < this.endPage; iPage++) { logger.info("Page No : " + (iPage + 1)); PDResources pdResources = list.get(iPage).getResources(); for (COSName c : pdResources.getXObjectNames()) { PDXObject o = pdResources.getXObject(c); if (o instanceof org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) { bImageFound = true; String fname = this.imageDestinationPath + "/" + fileName + "_" + totalImages + ".png"; ImageIO.write(((org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) o).getImage(), "png", new File(fname)); imgNames.add(fname); totalImages++; } } } document.close(); if (bImageFound) logger.info("Images are saved @ " + this.imageDestinationPath); else logger.info("No images were found in the PDF"); } catch (Exception e) { e.printStackTrace(); } return imgNames; }
From source file:ddf.catalog.transformer.input.pdf.GeoPdfParserImpl.java
License:Open Source License
/** * Generates a WKT compliant String from a PDF Document if it contains GeoPDF information. * Currently, only WGS84 Projections are supported (GEOGRAPHIC GeoPDF ProjectionType). * * @param pdfDocument - The PDF document * @return the WKT String/*w w w . ja v a 2 s.co m*/ * @throws IOException */ @Override public String apply(PDDocument pdfDocument) throws IOException { ToDoubleVisitor toDoubleVisitor = new ToDoubleVisitor(); LinkedList<String> polygons = new LinkedList<>(); for (PDPage pdPage : pdfDocument.getPages()) { COSDictionary cosObject = pdPage.getCOSObject(); COSBase lgiDictObject = cosObject.getObjectFromPath(LGIDICT); // Handle Multiple Map Frames if (lgiDictObject instanceof COSArray) { for (int i = 0; i < ((COSArray) lgiDictObject).size(); i++) { COSDictionary lgidict = (COSDictionary) cosObject.getObjectFromPath(LGIDICT + "/[" + i + "]"); COSDictionary projectionArray = (COSDictionary) lgidict.getDictionaryObject(PROJECTION); if (projectionArray != null) { String projectionType = ((COSString) projectionArray.getItem(PROJECTION_TYPE)).getString(); if (GEOGRAPHIC.equals(projectionType)) { COSArray neatlineArray = (COSArray) cosObject .getObjectFromPath(LGIDICT + "/[" + i + "]/" + NEATLINE); String wktString = getWktFromNeatLine(lgidict, neatlineArray, toDoubleVisitor); polygons.add(wktString); } else { LOGGER.debug("Unsupported projection type {}. Map Frame will be skipped.", projectionType); } } else { LOGGER.debug("No projection array found on the map frame. Map Frame will be skipped."); } } // Handle One Map Frame } else if (lgiDictObject instanceof COSDictionary) { COSDictionary lgidict = (COSDictionary) lgiDictObject; COSDictionary projectionArray = (COSDictionary) lgidict.getDictionaryObject(PROJECTION); if (projectionArray != null) { String projectionType = ((COSString) projectionArray.getItem(PROJECTION_TYPE)).getString(); if (GEOGRAPHIC.equals(projectionType)) { COSArray neatlineArray = (COSArray) cosObject.getObjectFromPath(LGIDICT + "/" + NEATLINE); if (neatlineArray == null) { neatlineArray = generateNeatLineFromPDFDimensions(pdPage); } polygons.add(getWktFromNeatLine(lgidict, neatlineArray, toDoubleVisitor)); } else { LOGGER.debug("Unsupported projection type {}. Map Frame will be skipped.", projectionType); } } else { LOGGER.debug("No projection array found on the map frame. Map Frame will be skipped."); } } } if (polygons.size() == 0) { LOGGER.debug( "No GeoPDF information found on PDF during transformation. Metacard location will not be set."); return null; } if (polygons.size() == 1) { return POLYGON + polygons.get(0) + "))"; } else { return polygons.stream().map(polygon -> "((" + polygon + "))") .collect(Collectors.joining(",", MULTIPOLYGON, ")")); } }