List of usage examples for org.apache.pdfbox.pdmodel PDDocument close
@Override public void close() throws IOException
From source file:eu.transkribus.languageresources.extractor.pdf.PDFExtractor.java
@Override public List<String> extractTextFromDocumentPagewise(String pathToFile) { List<String> pageWiseText = new LinkedList<>(); COSDocument cosDoc = null;/*w w w. j a va2 s . c o m*/ PDDocument pdDoc = null; try { PDFParser parser = new PDFParser(new FileInputStream(new File(pathToFile))); parser.parse(); cosDoc = parser.getDocument(); pdDoc = new PDDocument(cosDoc); for (int pageId = 0; pageId < pdDoc.getNumberOfPages(); pageId++) { pageWiseText.add(extractTextFromPage(pdDoc, pageId)); } } catch (IOException ex) { Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex); } finally { if (pdDoc != null) { try { pdDoc.close(); } catch (IOException ex) { Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex); } } if (cosDoc != null) { try { cosDoc.close(); } catch (IOException ex) { Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex); } } } return pageWiseText; }
From source file:eu.transkribus.languageresources.extractor.pdf.PDFExtractor.java
@Override public String extractTextFromPage(String pathToFile, int page) { StringBuilder sb = new StringBuilder(); COSDocument cosDoc = null;//from ww w. j a va 2 s. co m PDDocument pdDoc = null; try { PDFParser parser = new PDFParser(new FileInputStream(new File(pathToFile))); parser.parse(); cosDoc = parser.getDocument(); pdDoc = new PDDocument(cosDoc); sb.append(extractTextFromPage(pdDoc, page)); } catch (IOException ex) { Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex); } finally { if (pdDoc != null) { try { pdDoc.close(); } catch (IOException ex) { Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex); } } if (cosDoc != null) { try { cosDoc.close(); } catch (IOException ex) { Logger.getLogger(PDFExtractor.class.getName()).log(Level.SEVERE, null, ex); } } } return sb.toString(); }
From source file:extractor.Extractor.java
public static ArrayList<Document> returnDocuments(String pathBase, String[] files) { ArrayList<Document> documents = new ArrayList<>(); for (String file : files) { PDDocument pdDocument = null; String paperString = null; try {/*from ww w. java 2 s . c o m*/ pdDocument = PDDocument.load(new File(pathBase + file)); paperString = new PDFTextStripper().getText(pdDocument); pdDocument.close(); Document document = new Document(paperString); documents.add(document); } catch (FileNotFoundException ex) { System.out.println("Arquivo no encontrado! Detalhes: " + ex.getLocalizedMessage()); continue; } catch (IOException ex) { Logger.getLogger(Classifierdoc.class.getName()).log(Level.SEVERE, null, ex); } } return documents; }
From source file:extractor.pdftotext.PdfToText.java
private String getPdfBoxRaw(File file) { try {// w w w . j a v a2 s . c o m PDDocument doc = PDDocument.load(file); PDFTextStripper stripper = new PDFTextStripper(); stripper.setPageStart("PAGE START"); stripper.setPageEnd("PAGE END"); //gets the text form the doc and replaces unknown signs with \n String rawText = stripper.getText(doc).replaceAll("[\\p{Cc}\\p{Cf}\\p{Co}\\p{Cn}]", "\n"); doc.close(); return rawText; } catch (IOException ex) { Logger.getLogger(PdfToText.class.getName()).log(Level.SEVERE, null, ex); } return ""; }
From source file:FeatureExtraction.FeatureExtractorPDFStructuralPaths.java
/** * Return true if the PDF is compatible// www. j a va2 s . c o m * * @param filePath pdf file path * @return true if the PDF is compatible */ public boolean IsCompatiblePDF2(String filePath) { File pdfFile = new File(filePath); Map<String, Integer> structuralPaths = new HashMap<>(); HashSet<COSBase> visitedObjects = new HashSet<>(); boolean compatible = true; PDDocument pdf = new PDDocument(); COSDocument pdfDocument = new COSDocument(); String parentPath = ""; String pdfObjectName = "Trailer"; try { pdf = PDDocument.load(pdfFile); pdfDocument = pdf.getDocument(); ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), pdfObjectName, parentPath, structuralPaths, visitedObjects, 1); } catch (IOException e) { compatible = false; } finally { try { pdf.close(); pdfDocument.close(); } catch (IOException e) { Console.PrintException(String.format("Error closing PDF file: '%s'", filePath), e); } } return compatible; }
From source file:FeatureExtraction.FeatureExtractorPDFStructuralPathsOld.java
/** * Return true if the PDF is compatible//from ww w . j a v a 2 s . c om * * @param filePath pdf file path * @return true if the PDF is compatible */ public boolean IsCompatiblePDF2(String filePath) { File pdfFile = new File(filePath); Map<String, Integer> structuralPaths = new HashMap<>(); HashSet<COSBase> visitedObjects = new HashSet<>(); boolean compatible = true; PDDocument pdf = new PDDocument(); COSDocument pdfDocument = new COSDocument(); try { pdf = PDDocument.load(pdfFile); pdfDocument = pdf.getDocument(); ExtractPDFStructuralPathsRecursively(pdfDocument.getTrailer().getCOSObject(), "Trailer", "", structuralPaths, visitedObjects); } catch (IOException e) { compatible = false; } finally { try { pdf.close(); pdfDocument.close(); } catch (IOException e) { Console.PrintException(String.format("Error closing PDF file: '%s'", filePath), e); } } return compatible; }
From source file:fi.nls.oskari.printout.printing.pdfbox.UsingTextMatrix.java
License:Apache License
/** * creates a sample document with some text using a text matrix. * /*from w ww . j a v a2 s .c o m*/ * @param message * The message to write in the file. * @param outfile * The resulting PDF. * * @throws IOException * If there is an error writing the data. * @throws COSVisitorException * If there is an error writing the PDF. */ public void doIt(String message, String outfile) throws IOException, COSVisitorException { // the document PDDocument doc = null; try { doc = new PDDocument(); // Page 1 PDFont font = PDType1Font.HELVETICA; PDPage page = new PDPage(); page.setMediaBox(PDPage.PAGE_SIZE_A4); doc.addPage(page); float fontSize = 12.0f; PDRectangle pageSize = page.findMediaBox(); System.err.println("pageSize " + pageSize); System.err.println( "pageSize cm " + pageSize.getWidth() / 72 * 2.54 + "," + pageSize.getHeight() / 72 * 2.54); float centeredXPosition = (pageSize.getWidth() - fontSize / 1000f) / 2f; float stringWidth = font.getStringWidth(message); float centeredYPosition = (pageSize.getHeight() - (stringWidth * fontSize) / 1000f) / 3f; PDPageContentStream contentStream = new PDPageContentStream(doc, page, false, false); contentStream.setFont(font, fontSize); contentStream.beginText(); // counterclockwise rotation for (int i = 0; i < 8; i++) { contentStream.setTextRotation(i * Math.PI * 0.25, centeredXPosition, pageSize.getHeight() - centeredYPosition); contentStream.drawString(message + " " + i); } // clockwise rotation for (int i = 0; i < 8; i++) { contentStream.setTextRotation(-i * Math.PI * 0.25, centeredXPosition, centeredYPosition); contentStream.drawString(message + " " + i); } contentStream.endText(); contentStream.close(); // Page 2 page = new PDPage(); page.setMediaBox(PDPage.PAGE_SIZE_A4); doc.addPage(page); fontSize = 1.0f; contentStream = new PDPageContentStream(doc, page, false, false); contentStream.setFont(font, fontSize); contentStream.beginText(); // text scaling for (int i = 0; i < 10; i++) { contentStream.setTextScaling(12 + (i * 6), 12 + (i * 6), 100, 100 + i * 50); contentStream.drawString(message + " " + i); } contentStream.endText(); contentStream.close(); // Page 3 page = new PDPage(); page.setMediaBox(PDPage.PAGE_SIZE_A4); doc.addPage(page); fontSize = 1.0f; contentStream = new PDPageContentStream(doc, page, false, false); contentStream.setFont(font, fontSize); contentStream.beginText(); int i = 0; // text scaling combined with rotation contentStream.setTextMatrix(12, 0, 0, 12, centeredXPosition, centeredYPosition * 1.5); contentStream.drawString(message + " " + i++); contentStream.setTextMatrix(0, 18, -18, 0, centeredXPosition, centeredYPosition * 1.5); contentStream.drawString(message + " " + i++); contentStream.setTextMatrix(-24, 0, 0, -24, centeredXPosition, centeredYPosition * 1.5); contentStream.drawString(message + " " + i++); contentStream.setTextMatrix(0, -30, 30, 0, centeredXPosition, centeredYPosition * 1.5); contentStream.drawString(message + " " + i++); contentStream.endText(); contentStream.close(); // Page 4 { page = new PDPage(); page.setMediaBox(PDPage.PAGE_SIZE_A4); doc.addPage(page); fontSize = 1.0f; contentStream = new PDPageContentStream(doc, page, false, false); contentStream.setFont(font, fontSize); contentStream.beginText(); AffineTransform root = new AffineTransform(); root.scale(72.0 / 2.54, 72.0 / 2.54); for (i = 0; i < pageSize.getHeight() / 72 * 2.54; i++) { // text scaling combined with rotation { AffineTransform rowMatrix = new AffineTransform(root); rowMatrix.translate(1, i); contentStream.setTextMatrix(rowMatrix); contentStream.drawString(message + " " + i); } } contentStream.endText(); contentStream.close(); } doc.save(outfile); } finally { if (doc != null) { doc.close(); } } }
From source file:FileIOAux.PrintAux.java
/** * @see/*w w w . j a v a 2 s. com*/ * http://stackoverflow.com/questions/23326562/apache-pdfbox-convert-pdf-to-images * @param fil * @return */ public static BufferedImage[] pdfToImage(String fil) { BufferedImage[] bim = null; try { PDDocument document = PDDocument.load(new File(fil)); if (document != null) { PDFRenderer pdfRenderer = new PDFRenderer(document); bim = new BufferedImage[document.getNumberOfPages()]; for (int i = 0; i < document.getNumberOfPages(); i++) { bim[i] = pdfRenderer.renderImage(i); } document.close(); } } catch (IOException ex) { Logger.getLogger(PrintAux.class.getName()).log(Level.SEVERE, null, ex); } return bim; }
From source file:firmaapp.MainFormController.java
@FXML void workOrderDateButtonAction(ActionEvent event) throws IOException { LocalDate date = workOrderDatePicker.getValue(); ProductMySQL prodMySQL = new ProductMySQL(); String numOfWO = ""; numOfWO = prodMySQL.createWorkOrder(Date.from(date.atStartOfDay(ZoneId.systemDefault()).toInstant())); WorkOrderPDF woPDF = new WorkOrderPDF(numOfWO, Date.from(date.atStartOfDay(ZoneId.systemDefault()).toInstant()), RootController.workOrderProducts, RootController.workOrderSupplies); woPDF.createWorkOrder();/* ww w . jav a2 s . c o m*/ DateFormat df = new SimpleDateFormat("yy"); // Just the year, with 2 digits String formatedDate = df.format(woPDF.getDateOfWorkOrder()); File file = new File("pdf_docs" + File.separator + "work_orders" + File.separator + "work_order_" + woPDF.getNumOfWorkOrder().split("/")[0] + "_" + formatedDate + ".pdf"); PDDocument doc = PDDocument.load(file); PDFRenderer renderer = new PDFRenderer(doc); renderer.renderImageWithDPI(0, 300); //BufferedImage image = renderer.renderImage(0); BufferedImage image = renderer.renderImageWithDPI(0, 300); ImageIO.write(image, "PNG", new File("pdf_docs" + File.separator + "work_orders_picture" + File.separator + "work_order_" + woPDF.getNumOfWorkOrder().split("/")[0] + "_" + formatedDate + ".png")); doc.close(); WorkOrderMySQL woMySQL = new WorkOrderMySQL(); ArrayList<WorkOrder> orders = woMySQL.getAllWokrOrders(); RootController.allWorkOrders = orders; Collections.sort(RootController.allWorkOrders, new WorkOrderCompare()); dataWorkOrders.clear(); for (WorkOrder w : RootController.allWorkOrders) { dataWorkOrders.add(w); } }
From source file:firmaapp.MainFormController.java
public void printPDFDocument(File file) throws Exception { PrinterJob job = PrinterJob.getPrinterJob(); PageFormat pf = job.defaultPage(); Paper paper = new Paper(); //paper.setSize(8.5 * 72, 11 * 72); double margin = 20; paper.setImageableArea(0, 0, paper.getWidth() - margin, paper.getHeight() - margin); pf.setPaper(paper);/*from w w w.ja va2 s.c om*/ pf.setOrientation(PageFormat.LANDSCAPE); // PDFBox PDDocument doc = PDDocument.load(file); Book book = new Book(); book.append(new PDFPrintable(doc), pf); //job.setPageable(new PDFPageable(doc)); job.setPageable(book); //Book b = new Book(); //job.setJobName("Job"); if (job.printDialog()) { job.print(); } doc.close(); }