List of usage examples for org.apache.pdfbox.pdmodel PDDocument getNumberOfPages
public int getNumberOfPages()
From source file:com.fcore.base.fileSystem.utils.FileUtil.java
/** * PDF to Image(png)/*from w ww . ja v a 2s .com*/ * @param pdfPath * @param imagePath */ public static void pdf2png(String pdfPath, String imagePath) { long old = System.currentTimeMillis(); // pdf ?? File file = new File(pdfPath); try { PDDocument doc = PDDocument.load(file); PDFRenderer renderer = new PDFRenderer(doc); int pageCount = doc.getNumberOfPages(); for (int i = 0; i < pageCount; i++) { BufferedImage image = renderer.renderImageWithDPI(i, 250); // Windows //BufferedImage srcImage = resize(image, 240, 240);// ImageIO.write(image, "PNG", new File(imagePath + i + ".png")); } } catch (IOException e) { e.printStackTrace(); } long now = System.currentTimeMillis(); System.out.println("" + ((now - old) / 1000.0) + "\n\n" + "?:" + imagePath); }
From source file:com.formkiq.core.service.conversion.PdfToPngFormatConverter.java
License:Apache License
@Override public ConversionResult convert(final Object data, final WorkflowOutputDocumentType inputType, final WorkflowOutputDocumentType outputType) throws IOException { PDDocument doc = (PDDocument) data; PDFRenderer pdfRenderer = new PDFRenderer(doc); BufferedImage[] images = new BufferedImage[doc.getNumberOfPages()]; for (int page = 0; page < doc.getNumberOfPages(); ++page) { BufferedImage bim = pdfRenderer.renderImage(page, SCALE); images[page] = bim;// ww w . j a v a2s.com } ConversionResult result = merge(images); List<ConversionField> fields = findSigningButtons(doc, result); result.setFields(fields); return result; }
From source file:com.formkiq.core.service.conversion.PdfToPngFormatConverter.java
License:Apache License
/** * Find {@link PDSignatureField} on the Image. * @param doc {@link PDDocument}//www. j a v a 2s.com * @param result {@link ConversionResult} * @return {@link List} of {@link ConversionField} * @throws IOException IOException */ private List<ConversionField> findSigningButtons(final PDDocument doc, final ConversionResult result) throws IOException { List<ConversionField> fields = new ArrayList<>(); List<PDSignatureField> sigs = doc.getSignatureFields(); for (PDSignatureField s : sigs) { PDRectangle rect = PDRectangleUtil.calculateWidget(s.getWidgets()); PDAnnotationWidget widget = s.getWidgets().get(0); PDPage page = widget.getPage(); int pageNumber = doc.getPages().indexOf(page); float imagePageSize = result.getDataheight() / doc.getNumberOfPages(); float x = rect.getLowerLeftX(); float y = (imagePageSize - rect.getUpperRightY()) + (imagePageSize * pageNumber); ConversionField f = new ConversionField(); f.setDocumentfieldname(s.getFullyQualifiedName()); f.setX(x); f.setY(y); f.setHeight(rect.getHeight()); fields.add(f); } return fields; }
From source file:com.jaeksoft.searchlib.parser.PdfParser.java
License:Open Source License
private void extractMetaData(ParserResultItem result, PDDocument pdf) throws IOException { PDDocumentInformation info = pdf.getDocumentInformation(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.subject, info.getSubject()); result.addField(ParserFieldEnum.author, info.getAuthor()); result.addField(ParserFieldEnum.producer, info.getProducer()); result.addField(ParserFieldEnum.keywords, info.getKeywords()); String d = getDate(getCreationDate(info)); if (d != null) result.addField(ParserFieldEnum.creation_date, d); d = getDate(getModificationDate(info)); if (d != null) result.addField(ParserFieldEnum.modification_date, d); }/*w w w . j av a 2 s . c om*/ int pages = pdf.getNumberOfPages(); result.addField(ParserFieldEnum.number_of_pages, pages); PDDocumentCatalog catalog = pdf.getDocumentCatalog(); if (catalog != null) { result.addField(ParserFieldEnum.language, catalog.getLanguage()); } }
From source file:com.jaeksoft.searchlib.web.controller.ViewerController.java
License:Open Source License
private void loadPdfBox() throws IOException, CryptographyException, SearchLibException, InterruptedException { PDDocument document = null; try {/*from w w w .j ava 2 s . com*/ document = PDDocument.loadNonSeq(tempFile, null); // Trying to open with empty password boolean isEncrypted = document.isEncrypted(); if (isEncrypted) document.decrypt(""); loadGS(isEncrypted ? "" : null); List<Rectangle> boxList = new ArrayList<Rectangle>(0); checkPdfBoxHighlight(document, boxList); checkHocrHighlight(currentImage.getWidth(), currentImage.getHeight(), boxList); ImageUtils.yellowHighlight(currentImage, boxList, 0.1F); numberOfPages = document.getNumberOfPages(); } finally { if (document != null) IOUtils.close(document); } }
From source file:com.joowon.returnA.classifier.export.PdfImageExport.java
License:Open Source License
public static File[] export(PDDocument document, String filePath, String fileName) { File[] exportFiles = new File[document.getNumberOfPages()]; try {/* ww w .j a v a2s. com*/ PDFRenderer renderer = new PDFRenderer(document); for (int page = 0; page < document.getNumberOfPages(); ++page) { BufferedImage image = renderer.renderImageWithDPI(page, 300, ImageType.RGB); final String file = filePath + "/" + fileName + "_" + (page + 1) + ".png"; ImageIOUtil.writeImage(image, file, 300); exportFiles[page] = new File(file); System.out.println("Export image file from PDF : " + file + " [" + (page + 1) + "/" + document.getNumberOfPages() + "]"); } } catch (IOException exception) { exception.printStackTrace(); System.err.println("IOException occurred\nCheck file path."); } return exportFiles; }
From source file:com.joowon.returnA.classifier.extractor.PdfTextExtractor.java
License:Open Source License
public static void main(String[] args) throws IOException, PrinterException { // Target PDF Document PDDocument document = PDDocument .load(new File("/Users/Joowon/Documents/Github/ReturnA/data/tests/YAPNXRPm_eng1_mun.pdf")); List<String> pdfTextList = new ArrayList<>(); final int width = (int) document.getPage(0).getMediaBox().getWidth(); final int height = (int) document.getPage(0).getMediaBox().getHeight(); // Extract Test Information (first page's info area) pdfTextList.addAll(new PdfTextExtractor(document.getPage(0)).addRegion(0, 0, width, height / 4).extract()); for (int i = 0; i < document.getNumberOfPages(); ++i) { // Left side pdfTextList/*from w ww.ja v a2 s . c o m*/ .addAll(new PdfTextExtractor(document.getPage(i)).addRegion(0, 0, width / 2, height).extract()); // Right side pdfTextList.addAll( new PdfTextExtractor(document.getPage(i)).addRegion(width / 2, 0, width / 2, height).extract()); } System.out.println(pdfTextList.toString()); }
From source file:com.lanacion.adminsiteln.services.PdfIndexerService.PdfIndexerService.java
private int pdfgetPages(String fileName) { int numero_paginas = 0; PDFParser parser;// w ww .j ava 2 s . c om String parsedText = null; ; PDFTextStripper pdfStripper = null; //pdfStripper.setStartPage(0); //pdfStripper.setEndPage(0); PDDocument pdDoc = null; COSDocument cosDoc = null; File file = new File(fileName); if (!file.isFile()) { System.err.println("File " + fileName + " does not exist."); return 0; } try { parser = new PDFParser(new FileInputStream(file)); } catch (IOException e) { System.err.println("Unable to open PDF Parser. " + e.getMessage()); return 0; } try { parser.parse(); cosDoc = parser.getDocument(); pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); numero_paginas = pdDoc.getNumberOfPages(); } catch (Exception e) { System.err.println("An exception occured in parsing the PDF Document." + e.getMessage()); } finally { try { if (cosDoc != null) { cosDoc.close(); } if (pdDoc != null) { pdDoc.close(); } } catch (Exception e) { e.printStackTrace(); } } return numero_paginas; }
From source file:com.openkm.util.metadata.MetadataExtractor.java
License:Open Source License
/** * Extract metadata from PDF// w ww . j a v a 2 s .com */ public static PdfMetadata pdfExtractor(InputStream is) throws IOException { PDDocument doc = PDDocument.load(is); PDDocumentInformation info = doc.getDocumentInformation(); PdfMetadata md = new PdfMetadata(); md.setNumberOfPages(doc.getNumberOfPages()); md.setTitle(info.getTitle()); md.setAuthor(info.getAuthor()); md.setSubject(info.getSubject()); md.setKeywords(info.getKeywords()); md.setCreator(info.getCreator()); md.setProducer(info.getProducer()); md.setTrapped(info.getTrapped()); md.setCreationDate(info.getCreationDate()); md.setModificationDate(info.getModificationDate()); log.info("pdfExtractor: {}", md); return md; }
From source file:com.opensearchserver.extractor.parser.PdfBox.java
License:Apache License
private void extractMetaData(PDDocument pdf) throws IOException { PDDocumentInformation info = pdf.getDocumentInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(SUBJECT, info.getSubject()); metas.add(AUTHOR, info.getAuthor()); metas.add(PRODUCER, info.getProducer()); metas.add(KEYWORDS, info.getKeywords()); metas.add(CREATION_DATE, getDate(getCreationDate(info))); metas.add(MODIFICATION_DATE, getModificationDate(info)); }/*from w w w .j a v a 2 s .c om*/ int pages = pdf.getNumberOfPages(); metas.add(NUMBER_OF_PAGES, pages); PDDocumentCatalog catalog = pdf.getDocumentCatalog(); if (catalog != null) metas.add(LANGUAGE, catalog.getLanguage()); }