List of usage examples for org.apache.pdfbox.pdmodel PDDocument PDDocument
public PDDocument(COSDocument doc)
From source file:aplicacion.sistema.indexer.test.PDFTextStripperOrg.java
License:Apache License
/** * @deprecated/*from w w w . j a va 2s. com*/ * @see PDFTextStripper#getText( PDDocument ) * @param doc The document to extract the text from. * @return The document text. * @throws IOException If there is an error extracting the text. */ public String getText(COSDocument doc) throws IOException { return getText(new PDDocument(doc)); }
From source file:aplicacion.sistema.indexer.test.PDFTextStripperOrg.java
License:Apache License
/** * @deprecated// w w w . j a v a 2 s. c o m * @see PDFTextStripper#writeText( PDDocument, Writer ) * @param doc The document to extract the text. * @param outputStream The stream to write the text to. * @throws IOException If there is an error extracting the text. */ public void writeText(COSDocument doc, Writer outputStream) throws IOException { writeText(new PDDocument(doc), outputStream); }
From source file:com.aaasec.sigserv.csspsupport.pdfbox.modifications.CsCOSWriter.java
License:Apache License
/** * This will write the pdf document.// w w w . ja va 2s . co m * * @param doc The document to write. * * @throws COSVisitorException If an error occurs while generating the data. */ public void write(COSDocument doc) throws COSVisitorException { PDDocument pdDoc = new PDDocument(doc); write(pdDoc); }
From source file:com.amandine.NewEmptyJUnitTest.java
public String pdflookbook() throws IOException { String filePath = "C:\\Users\\janitha\\OneDrive\\Documents\\lookbookSS2016.pdf"; InputStream inputStream = null; String statementPDF = null;/*ww w .j a va 2s.com*/ try { inputStream = new FileInputStream(filePath); PDFParser parser = new PDFParser(inputStream); // This will parse the stream and populate the COSDocument object. parser.parse(); // Get the document that was parsed. COSDocument cosDoc = parser.getDocument(); // This class will take a pdf document and strip out all of the text and // ignore the formatting and such. PDFTextStripper pdfStripper = new PDFTextStripper(); // This is the in-memory representation of the PDF document PDDocument pdDoc = new PDDocument(cosDoc); pdfStripper.setStartPage(3); pdfStripper.setEndPage(pdDoc.getNumberOfPages() - 1); assertEquals(41, pdDoc.getNumberOfPages() - 1); // This will return the text of a document. statementPDF = pdfStripper.getText(pdDoc); // System.out.println(statementPDF); // String [] statementPDFArray = statementPDF.split("\\n"); // assertEquals(256, statementPDFArray.length); } catch (Exception e) { //Syste String errorMessage = "\nUnexpected Exception: " + e.getClass() + "\n" + e.getMessage(); for (StackTraceElement trace : e.getStackTrace()) { errorMessage += "\n\t" + trace; } System.out.println(errorMessage); } finally { if (inputStream != null) { inputStream.close(); } } return statementPDF; }
From source file:com.cisco.iwe.services.util.EmailMonitor.java
/** * //from w w w . j a v a 2s .c om * @param fileDir * @return */ /* This method is used to scan the uploaded expense receipt in .pdf format and extract the text embedded in it. */ public String scanPDF(String fileDir) { PDFParser parser; String parsedText = null; PDFTextStripper pdfStripper = null; PDDocument pdDoc = null; COSDocument cosDoc = null; File file = new File(fileDir); if (!file.isFile()) { System.err.println("File " + fileDir + " does not exist."); return null; } try { parser = new PDFParser(new FileInputStream(file)); } catch (IOException e) { System.err.println("Unable to open PDF Parser. " + e.getMessage()); return null; } try { parser.parse(); cosDoc = parser.getDocument(); pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); pdfStripper.setStartPage(1); pdfStripper.setEndPage(pdDoc.getNumberOfPages()); parsedText = pdfStripper.getText(pdDoc); } catch (Exception e) { System.err.println("An exception occured in parsing the PDF Document." + e.getMessage()); } finally { try { if (cosDoc != null) cosDoc.close(); if (pdDoc != null) pdDoc.close(); } catch (Exception e) { e.printStackTrace(); } } return parsedText; }
From source file:com.iqtb.validacion.util.LeerPDF.java
public String pdftoText(byte[] bytesPdf) { InputStream in = new ByteArrayInputStream(bytesPdf); // Se verifica si se puede abrir el InputStream try {//ww w. j ava2 s . c om parser = new PDFParser(in); } catch (IOException e) { logger.error("No se puede abrir. ERROR " + e); return null; } // En este proceso se abre, convierte y se cierra // el archivo PDF try { parser.parse(); cosDoc = parser.getDocument(); pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); parsedText = pdfStripper.getText(pdDoc); cosDoc.close(); pdDoc.close(); } catch (IOException e) { logger.error("Ocurri un error. ERROR " + e); try { if (cosDoc != null) { cosDoc.close(); } if (pdDoc != null) { pdDoc.close(); } } catch (IOException e1) { logger.error("Ocurri un error. ERROR " + e1); } return null; } return parsedText; }
From source file:com.lanacion.adminsiteln.services.PdfIndexerService.PdfIndexerService.java
/** * Metodos privados para la indexacin/* w w w .j a v a2s .c om*/ */ private String pdftoText(String fileName, int pagina) { PDFParser parser; String parsedText = null; ; PDFTextStripper pdfStripper = null; //pdfStripper.setStartPage(0); //pdfStripper.setEndPage(0); PDDocument pdDoc = null; COSDocument cosDoc = null; File file = new File(fileName); if (!file.isFile()) { System.err.println("File " + fileName + " does not exist."); return null; } try { parser = new PDFParser(new FileInputStream(file)); } catch (IOException e) { System.err.println("Unable to open PDF Parser. " + e.getMessage()); return null; } try { parser.parse(); cosDoc = parser.getDocument(); pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); pdfStripper.setStartPage(pagina); pdfStripper.setEndPage(pagina); parsedText = pdfStripper.getText(pdDoc); } catch (Exception e) { System.err.println("An exception occured in parsing the PDF Document." + e.getMessage()); } finally { try { if (cosDoc != null) { cosDoc.close(); } if (pdDoc != null) { pdDoc.close(); } } catch (Exception e) { e.printStackTrace(); } } return parsedText; }
From source file:com.lanacion.adminsiteln.services.PdfIndexerService.PdfIndexerService.java
private int pdfgetPages(String fileName) { int numero_paginas = 0; PDFParser parser;// w w w.j a va 2 s . c o m String parsedText = null; ; PDFTextStripper pdfStripper = null; //pdfStripper.setStartPage(0); //pdfStripper.setEndPage(0); PDDocument pdDoc = null; COSDocument cosDoc = null; File file = new File(fileName); if (!file.isFile()) { System.err.println("File " + fileName + " does not exist."); return 0; } try { parser = new PDFParser(new FileInputStream(file)); } catch (IOException e) { System.err.println("Unable to open PDF Parser. " + e.getMessage()); return 0; } try { parser.parse(); cosDoc = parser.getDocument(); pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); numero_paginas = pdDoc.getNumberOfPages(); } catch (Exception e) { System.err.println("An exception occured in parsing the PDF Document." + e.getMessage()); } finally { try { if (cosDoc != null) { cosDoc.close(); } if (pdDoc != null) { pdDoc.close(); } } catch (Exception e) { e.printStackTrace(); } } return numero_paginas; }
From source file:com.pluszero.rostertogo.PdfManager.java
private void ToText(File file) throws IOException { this.pdfStripper = null; this.pdDoc = null; this.cosDoc = null; parser = new PDFParser(file); // for pfdBox 1.8, as 2.0 not yet supported in Android parser.parse();/*from w w w .j av a 2 s .c o m*/ cosDoc = parser.getDocument(); pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); pdDoc.getNumberOfPages(); pdfStripper.setStartPage(1); pdfStripper.setEndPage(pdDoc.getNumberOfPages()); text = pdfStripper.getText(pdDoc); pdDoc.close(); }
From source file:com.pluszero.rostertogo.PdfManager.java
private void ToText(InputStream is) throws IOException { this.pdfStripper = null; this.pdDoc = null; this.cosDoc = null; parser = new PDFParser(is); // for PdfBox 1.8 as 2.0 not yet supported in Android parser.parse();/*from ww w . j av a 2 s . c om*/ cosDoc = parser.getDocument(); pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); pdDoc.getNumberOfPages(); pdfStripper.setStartPage(1); pdfStripper.setEndPage(pdDoc.getNumberOfPages()); text = pdfStripper.getText(pdDoc); pdDoc.close(); }