List of usage examples for org.apache.pdfbox.pdmodel PDDocument getDocumentCatalog
public PDDocumentCatalog getDocumentCatalog()
From source file:Project.data.preparation.ImageExtraction.java
public void extractImages(String sourceDir, String destinationDir) throws IOException, CryptographyException, COSVisitorException { PDDocument document = null; double[] size; if (oldFile.exists()) { document = PDDocument.load(sourceDir); if (document.isEncrypted()) { document.decrypt(""); }/*from w w w . j a va 2 s. c o m*/ PrintImageLocation printer; // Get image location List<PDPage> list = document.getDocumentCatalog().getAllPages(); String fileName_img = oldFile.getName().replace(".pdf", "_cover"); int pageNum = 0; int totalImages = 1; System.out.println("\n" + filename); for (PDPage page : list) { original_imgName = new ArrayList<String>(); location_xy = new ArrayList<double[]>(); size_xy_ordered = new ArrayList<double[]>(); size_xy_tmp = new ArrayList<double[]>(); PDResources pdResources = page.getResources(); Map pageImages = pdResources.getXObjects(); pageNum++; if (pageImages != null && pageImages.size() > 0) { Iterator imageIter = pageImages.keySet().iterator(); while (imageIter.hasNext()) { String key = (String) imageIter.next(); PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key); String imgName = fileName_img + "_" + totalImages; System.out.println("Page Number : " + pageNum + "\t" + imgName); pdxObjectImage.write2file(destinationDir + imgName); original_imgName.add(imgName + "." + pdxObjectImage.getSuffix()); size = new double[] { pdxObjectImage.getWidth(), pdxObjectImage.getHeight() }; size_xy_ordered.add(size); totalImages++; } //Start for detect figure name for image renaming printer = new PrintImageLocation(page); location_xy = printer.getLocation_xy(); size_xy_tmp = printer.getSize_xy(); RearrangeImageOrder(location_xy, size_xy_tmp, size_xy_ordered); //PrinttoString(); DetectFigureName detectFig = new DetectFigureName(original_imgName, filename, pageNum, page, location_ordered, size_xy_ordered); } } } else { System.err.println("File not exists"); } if (document != null) { document.close(); } }
From source file:richtercloud.document.scanner.gui.MainPanel.java
License:Open Source License
/** * Uses a modal dialog in order to display the progress of the retrieval and * make the operation cancelable./*from w ww. ja va 2 s . co m*/ * @param documentFile * @return the retrieved images or {@code null} if the retrieval has been * canceled (in dialog) * @throws DocumentAddException * @throws InterruptedException * @throws ExecutionException */ /* internal implementation notes: - can't use ProgressMonitor without blocking EVT instead of a model dialog when using SwingWorker.get */ public List<BufferedImage> retrieveImages(final File documentFile) throws DocumentAddException, InterruptedException, ExecutionException { if (documentFile == null) { throw new IllegalArgumentException("documentFile mustn't be null"); } final SwingWorkerGetWaitDialog dialog = new SwingWorkerGetWaitDialog(SwingUtilities.getWindowAncestor(this), //owner DocumentScanner.generateApplicationWindowTitle("Wait", APP_NAME, APP_VERSION), //dialogTitle "Retrieving image data", //labelText null //progressBarText ); final SwingWorker<List<BufferedImage>, Void> worker = new SwingWorker<List<BufferedImage>, Void>() { @Override protected List<BufferedImage> doInBackground() throws Exception { List<BufferedImage> retValue = new LinkedList<>(); try { InputStream pdfInputStream = new FileInputStream(documentFile); PDDocument document = PDDocument.load(pdfInputStream); @SuppressWarnings("unchecked") List<PDPage> pages = document.getDocumentCatalog().getAllPages(); for (PDPage page : pages) { if (dialog.isCanceled()) { document.close(); MainPanel.LOGGER.debug("tab generation aborted"); return null; } BufferedImage image = page.convertToImage(); retValue.add(image); } document.close(); } catch (IOException ex) { throw new DocumentAddException(ex); } return retValue; } @Override protected void done() { } }; worker.addPropertyChangeListener(new SwingWorkerCompletionWaiter(dialog)); worker.execute(); //the dialog will be visible until the SwingWorker is done dialog.setVisible(true); List<BufferedImage> retValue = worker.get(); return retValue; }
From source file:se.streamsource.streamflow.web.application.pdf.Underlay.java
License:Apache License
private void overlayWithDarkenBlendMode(PDDocument document, PDDocument overlay) throws IOException { PDXObjectForm xobject = importAsXObject(document, (PDPage) overlay.getDocumentCatalog().getAllPages().get(0)); PDExtendedGraphicsState darken = new PDExtendedGraphicsState(); darken.getCOSDictionary().setName("BM", "Darken"); List<PDPage> pages = document.getDocumentCatalog().getAllPages(); for (PDPage page : pages) { Map<String, PDExtendedGraphicsState> states = page.getResources().getGraphicsStates(); if (states == null) states = new HashMap<>(); String darkenKey = MapUtil.getNextUniqueKey(states, "Dkn"); states.put(darkenKey, darken);// w w w . ja va2s.co m page.getResources().setGraphicsStates(states); PDPageContentStream stream = new PDPageContentStream(document, page, true, false, true); stream.appendRawCommands(String.format("/%s gs ", darkenKey)); stream.drawXObject(xobject, 0, 0, 1, 1); stream.close(); } }
From source file:tesreateimage.Tesreateimage.java
public static void convertPDFToImage(String pdfFilename) throws IOException { PDDocument document = PDDocument.loadNonSeq(new File(pdfFilename), null); List<PDPage> pdPages = document.getDocumentCatalog().getAllPages(); int page = 0; for (PDPage pdPage : pdPages) { ++page;/*from w w w. j av a 2 s. c o m*/ BufferedImage bim = pdPage.convertToImage(BufferedImage.TYPE_INT_RGB, 300); ImageIOUtil.writeImage(bim, "png", pdfFilename + "-" + page, 300); } document.close(); }
From source file:Tools.PostProcessing.java
private String generateProcessedAndRejectPDFs(String preProcPdfFileName) throws IOException, COSVisitorException { PDDocument pdf = PDDocument.load(preProcPdfFileName); PDDocument rejectPdf = new PDDocument(); PDDocument cedmsPdf = new PDDocument(); //String rejectPdfFileName = preProcPdfFileName.replace(".pdf", "_forReject.pdf"); //String cedmsPdfFileName = preProcPdfFileName.replace(".pdf", "_forCEDMS.pdf"); int pageNum = pdf.getNumberOfPages(); //total number of pages in the pdf file // add reject page into rejectPdf PDFTextStripper pdfStripper = new PDFTextStripper(); int seqNumber = 1; boolean isLastReject = true; // last page status for (int i = 0; i < pageNum; i++) { PDPage page = (PDPage) pdf.getDocumentCatalog().getAllPages().get(i); int pageIndex = i + 1; pdfStripper.setStartPage(pageIndex); pdfStripper.setEndPage(pageIndex); String res = pdfStripper.getText(pdf); // System.out.println(res); if (res.contains(GlobalVar.PRE_PROC_KEY_SYMBOL)) { String[] data = GlobalVar.getCtrlNumAndfullSSN(res); String ctrlNum = data[0]; String fullSSN = data[1]; // System.out.println("full ssn:" + fullSSN + ". ctrl num:" + ctrlNum); // if(LEGIT_LV_MAP_FOR_COLOR_LV_LOG.containsKey(fullSSN)){ // // System.out.println("ctrl num: " + LEGIT_LV_MAP.get(fullSSN)); // } if (LEGIT_LV_MAP_FOR_COLOR_LV_LOG.containsKey(fullSSN) && LEGIT_LV_MAP_FOR_COLOR_LV_LOG.get(fullSSN).containsKey(ctrlNum)) { // System.out.println("Good leave"); int thisSeqNumber = Integer.parseInt(LEGIT_LV_MAP_FOR_COLOR_LV_LOG.get(fullSSN).get(ctrlNum)); if (thisSeqNumber == seqNumber) { cedmsPdf.addPage(page); //LEGIT_LV_MAP_FOR_COLOR_LV_LOG.get(fullSSN).remove(ctrlNum); // remove isLastReject = false; seqNumber++;/*w w w. j a va2 s .c o m*/ } else { String msg = "Order might be incorrect or there is a duplicate! ssn: " + fullSSN + " ctrl num: " + ctrlNum + ". Seq number is: " + seqNumber; //JOptionPane.showMessageDialog(null, msg); System.out.println(msg); } } else { rejectPdf.addPage(page); drawComments(rejectPdf, page, fullSSN, ctrlNum); isLastReject = true; } } else { // add the supporting documents to the last pdf file if (isLastReject) { rejectPdf.addPage(page); } else { cedmsPdf.addPage(page); } } } String cedmsPdfFileName = null; String rejectPdfFileName = null; if (preProcPdfFileName.contains(".pdf")) { cedmsPdfFileName = preProcPdfFileName.replace(".pdf", GlobalVar.PRE_CEDMS_PDF); rejectPdfFileName = preProcPdfFileName.replace(".pdf", "_forReject.pdf"); } else if (preProcPdfFileName.contains(".PDF")) { cedmsPdfFileName = preProcPdfFileName.replace(".PDF", GlobalVar.PRE_CEDMS_PDF); rejectPdfFileName = preProcPdfFileName.replace(".PDF", "_forReject.pdf"); } else { JOptionPane.showMessageDialog(null, "Invalid pre-processing file."); } // preProcPdfFileName.replace(".pdf", "_forReject.pdf"); // if (preProcPdfFileName.contains(".pdf")){ // cedmsPdfFileName = preProcPdfFileName.replace(".pdf", GlobalVar.PRE_CEDMS_PDF); // } else if (preProcPdfFileName.contains(".PDF")) { // cedmsPdfFileName = preProcPdfFileName.replace(".PDF", GlobalVar.PRE_CEDMS_PDF); // } else { // JOptionPane.showMessageDialog(null, "Invalid pre-processing file."); // } // if (rejectPdf.getNumberOfPages() > 0 && cedmsPdf.getNumberOfPages() > 0) { cedmsPdf.save(cedmsPdfFileName); rejectPdf.save(rejectPdfFileName); // JOptionPane.showMessageDialog(null, "The ready-for-upload and the rejected " // + "leave forms are saved in *_forCEDMS.pdf and *_forReject.pdf, respectively."); } else if (rejectPdf.getNumberOfPages() > 0) { rejectPdf.save(rejectPdfFileName); cedmsPdfFileName = null; // no cedms file is generated. // JOptionPane.showMessageDialog(null, "The rejected leave forms are saved in *_forReject.pdf."); } else if (cedmsPdf.getNumberOfPages() > 0) { cedmsPdf.save(cedmsPdfFileName); // JOptionPane.showMessageDialog(null, "The ready-for-upload leave forms are saved in *_forCEDMS.pdf."); } rejectPdf.close(); cedmsPdf.close(); pdf.close(); return cedmsPdfFileName; }
From source file:Tools.PostProcessing.java
private String pdfNumbering(String processedPdfFileName) throws IOException, COSVisitorException { PDDocument cedmsPdf = PDDocument.load(processedPdfFileName); List pages = cedmsPdf.getDocumentCatalog().getAllPages(); PDFTextStripper pdfStripper = new PDFTextStripper(); String res = pdfStripper.getText(cedmsPdf); //System.out.println(res); Boolean isPreProcessed = res.contains(GlobalVar.PRE_PROC_KEY_SYMBOL); // check if the file is pre-processed. Boolean isNumbered = res.contains("/0"); Iterator<PDPage> iter = pages.iterator(); int sequenceNum = 1; // start from 0001 if (isPreProcessed && isNumbered) { GlobalVar.updateSeqNum(cedmsPdf, CYCLE); // update the sequence number } else if (isPreProcessed) { // first time int pageNumber = 1; while (iter.hasNext()) { PDPage page = iter.next();/*from ww w . j a va2 s . c o m*/ pdfStripper.setStartPage(pageNumber); pdfStripper.setEndPage(pageNumber); res = pdfStripper.getText(cedmsPdf); // == numbering if (res.contains(GlobalVar.PRE_PROC_KEY_SYMBOL)) { String[] data = res.split(GlobalVar.PRE_PROC_KEY_SYMBOL); if (VERIFY_WITH_LISTING) { //verify the sequence number with the number on 80/80 listing String ssn = data[0].substring(data[0].length() - GlobalVar.SSN_LEN, data[0].length()); String ctrlNum = data[1].substring(0, -GlobalVar.MAX_CTRL_NUM_LEN); String seqNum = LEGIT_LV_MAP_FOR_COLOR_LV_LOG.get(ssn).get(ctrlNum); int seqNumberFrom8080Listing = Integer.parseInt(seqNum); if (seqNumberFrom8080Listing != sequenceNum) { JOptionPane.showMessageDialog(null, ssn + " " + ctrlNum + " seq num: " + seqNum + " do not match 80/80 listing"); } } PDPageContentStream stream = new PDPageContentStream(cedmsPdf, page, true, false); stream.beginText(); stream.setFont(PDType1Font.HELVETICA, GlobalVar.SEQ_NUM_FONT_SIZE); stream.moveTextPositionByAmount(GlobalVar.SEQ_NUM_TEXT_X_POSITION, GlobalVar.SEQ_NUM_TEXT_Y_POSITION); stream.setTextRotation(3.14 / 2, GlobalVar.SEQ_NUM_TEXT_X_POSITION, GlobalVar.SEQ_NUM_TEXT_Y_POSITION); // rotate text 90 degree at x = 600, y = 400 //stream.drawString(CYCLE + "/" + seqNum); stream.drawString(CYCLE + "/" + GlobalVar.globalCountGenerator5Digit(sequenceNum)); sequenceNum++; stream.endText(); stream.close(); } pageNumber++; // end of numbering } } // String suffix = "_" + CYCLE +" Numbered.pdf"; // String fileName = DTL_PDF_FILE_NAME.replace(".pdf", suffix); String fileName = processedPdfFileName.replaceAll(".pdf", GlobalVar.NUMBERED_PDF); cedmsPdf.save(fileName); cedmsPdf.close(); return fileName; }
From source file:Tools.PreProcessing.java
private void generatePDFFile(String pdfFileName, String xlsxFileName, Boolean[][] statusArray) throws IOException, COSVisitorException { List<String> textList = readXlsxFile(xlsxFileName); // System.out.println("text list: " + textList); //Iterator<String> it = textList.iterator(); PDDocument pdf = PDDocument.load(pdfFileName); List pages = pdf.getDocumentCatalog().getAllPages(); Iterator<PDPage> iter = pages.iterator(); int pageNum = 0; // 0 based int index = 0; while (iter.hasNext()) { PDPage page = iter.next();//w w w. ja v a2s. com // PDPage pageBlank = new PDPage(); PDPageContentStream stream = new PDPageContentStream(pdf, page, true, false); if (statusArray[GlobalVar.SELECT_BUTTON_INDEX][pageNum]) { if (index < textList.size()) { String text = textList.get(index); // zero based //System.out.println(text); pageWrite(stream, text, index); } else { JOptionPane.showMessageDialog(null, "Preprocessing is inaccurate. XLSX list is shorter than the pdf file."); break; } index++; } stream.close(); pageNum++; } if (index > textList.size()) { JOptionPane.showMessageDialog(null, "Preproc might be inaccurate. XLSX list is longer than the pdf file."); } // out put two pdf files: one is template for printer print hardcopies, the other is digital copy String suffix = "_" + "_pre_processed.pdf"; pdfFileName = pdfFileName.replace(".pdf", suffix); pdf.save(pdfFileName); pdf.close(); }
From source file:transeditor.TransEditorFXMLController.java
private void importPDF(File file, Integer page) { PDDocument pdf; try {/*from w w w. j a va 2s.c om*/ pdf = PDDocument.load(file); List<PDPage> list = pdf.getDocumentCatalog().getAllPages(); //ToDO: Nicer selection window BufferedImage bfimage = list.get(page).convertToImage(1, 300); Image image = SwingFXUtils.toFXImage(bfimage, null); imageview.setImage(image); width = imageview.getBoundsInParent().getWidth(); getZoom(width); } catch (Exception e) { } }
From source file:uk.ac.ebi.biostudies.file.thumbnails.PDFThumbnail.java
License:Apache License
@Override public void generateThumbnail(String sourceFilePath, File thumbnailFile) throws IOException { PDDocument pdf = null; try {/* w w w . ja v a 2 s . c o m*/ pdf = PDDocument.load(sourceFilePath); PDPage page = (PDPage) pdf.getDocumentCatalog().getAllPages().get(0); BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_RGB, 96); float inverseAspectRatio = ((float) image.getHeight()) / image.getWidth(); BufferedImageOp resampler = new ResampleOp(THUMBNAIL_WIDTH, Math.round(inverseAspectRatio * THUMBNAIL_WIDTH), ResampleOp.FILTER_LANCZOS); BufferedImage output = resampler.filter(image, null); ImageIO.write(output, "png", thumbnailFile); } finally { if (pdf != null) { pdf.close(); } } }
From source file:uk.ac.ebi.tools.PDFExtractor.java
License:Open Source License
/** * Extracts images from a PDF file and returns them in a list. * * @param filePath//from w ww .java2s.c o m * @return * @throws java.io.IOException */ public HashMap<String, String> getImages(String filePath) throws IOException { HashMap<String, String> imagePaths = new HashMap<>(); try { if (new File(filePath).exists()) { PDDocument document = PDDocument.load(filePath); List<PDPage> list = document.getDocumentCatalog().getAllPages(); for (PDPage page : list) { PDResources pdResources = page.getResources(); Map pageImages = pdResources.getImages(); if (pageImages.size() > 0) { Iterator imageIter = pageImages.keySet().iterator(); while (imageIter.hasNext()) { String key = (String) imageIter.next(); PDXObjectImage pdxObjectImage = (PDXObjectImage) pageImages.get(key); String uniqueName = PDFDoc.generateUniqueName(); StringBuilder builder = new StringBuilder(); // set the imageFormat String imageFormat = pdxObjectImage.getSuffix(); builder = builder.append(System.getProperty("user.home")).append("/") .append(uniqueName); imagePaths.put(builder.toString(), imageFormat); pdxObjectImage.write2file(builder.toString()); builder.delete(0, builder.length()); } } } document.close(); } else { System.err.println("File not exists"); } } catch (IOException ex) { PDFlogger.log(Level.SEVERE, "Error while extracting: Please check the input.", ex.getMessage()); } return imagePaths; }