List of usage examples for org.apache.pdfbox.pdmodel PDDocument close
@Override public void close() throws IOException
From source file:IO.search.SearchWordFile.java
private void search(File scrFile, String word) { //split the key word in different way //there are two way to split key word //space or hyphen //space 's meaning is any one key word contain in the file we search //hyphen 's meaning is all key word must contain in the file we seach String[] arrStr = null;/*from w w w .java2 s . c o m*/ String[] arrStrA = null; if (word.contains(" ")) { arrStr = word.split(" "); } else if (word.contains("-")) { arrStrA = word.split("-"); System.out.println("reach"); } //regular expression mean suffixes must contain doc. boolean is03word = scrFile.getName().matches("^.+\\.(?i)(doc)$"); if (is03word) { try { InputStream is = new FileInputStream(scrFile); WordExtractor ex = new WordExtractor(is); String text2003 = ex.getText(); if (arrStr != null && arrStr.length > 0) { //if keyword has space ,then we do spilt it //invoke the method finding(text2003, arrStr, scrFile); } else if (arrStrA != null && arrStrA.length > 0) { //if keyword has hyphen, it mean that the file we search must contain these key word. //we are using count varible to count the text of the file containing keyword whether enough or not. int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (text2003.toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } //if count varible if equal with amount of keyword that the file is we want. if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (text2003.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } else if (scrFile.getName().matches("^.+\\.(?i)(docx)$")) { try { OPCPackage opcPackage = POIXMLDocument.openPackage(scrFile.getPath()); POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage); String text2007 = extractor.getText(); if (arrStr != null && arrStr.length > 0) { finding(text2007, arrStr, scrFile); } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (text2007.toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (text2007.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } else if (scrFile.getName().matches("^.+\\.(?i)(pdf)$")) { FileInputStream input = null; PDDocument pdfDocument = null; try { input = new FileInputStream(scrFile); PDFParser pdfParser = new PDFParser(input); pdfParser.parse(); pdfDocument = pdfParser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); String content = stripper.getText(pdfDocument); if (arrStr != null && arrStr.length > 0) { finding(content, arrStr, scrFile); } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (content.toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (content.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } finally { try { input.close(); pdfDocument.close(); } catch (IOException ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } } else if (scrFile.getName().matches("^.+\\.(?i)(txt)$")) { BufferedReader in = null; StringBuffer sb = new StringBuffer(); try { in = new BufferedReader((new InputStreamReader(new FileInputStream(scrFile), getCharset(scrFile.getAbsolutePath())))); String line = null; while ((line = in.readLine()) != null) { sb.append(line); } if (arrStr != null && arrStr.length > 0) { if (finding(sb.toString(), arrStr, scrFile)) { return; } } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (sb.toString().contains(arrStrA[i])) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (line.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); return; } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } finally { try { in.close(); } catch (IOException ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } } else if (scrFile.getName().matches("^.+\\.(?i)(ppt)$")) {//find the key word in ppt file InputStream is = null; try { StringBuffer content = new StringBuffer(""); is = new FileInputStream(scrFile); //get core API HSLFSlideShow ss = new HSLFSlideShow(is); //get how many page in this PPT List<HSLFSlide> slides = ss.getSlides(); System.out.println("total have " + slides.size() + " page PPT"); for (int i = 0; i < slides.size(); i++) { //get each page of ppt content, retrun is List List<List<HSLFTextParagraph>> textParagraphs = slides.get(i).getTextParagraphs(); if (textParagraphs != null) { for (int j = 0; j < textParagraphs.size(); j++) { content.append("\n"); //get each row of the page List<HSLFTextParagraph> hslfTextParagraph = textParagraphs.get(j); for (int f = 0; f < hslfTextParagraph.size(); f++) { //get the text of this row content.append(hslfTextParagraph.get(f).toString()); } } } } if (arrStr != null && arrStr.length > 0) { finding(content.toString(), arrStr, scrFile); } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (content.toString().toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (content.toString().toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } finally { try { is.close(); } catch (IOException ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } } else if (scrFile.getName().matches("^.+\\.(?i)(pptx)$")) {//if powerpoint is 2007 or after we use this method String conetxt = null; try { conetxt = new XSLFPowerPointExtractor(POIXMLDocument.openPackage(scrFile.getPath())).getText(); if (arrStr != null && arrStr.length > 0) { finding(conetxt, arrStr, scrFile); } else if (arrStrA != null && arrStrA.length > 0) { int count = 0; for (int i = 0; i < arrStrA.length; i++) { if (conetxt.toLowerCase().contains(arrStrA[i].toLowerCase())) { count++; } } if (count == arrStrA.length) { nameList.add(scrFile.getPath()); } } else if (conetxt.toLowerCase().contains(word.toLowerCase())) { System.out.println("true"); nameList.add(scrFile.getPath()); } } catch (Exception ex) { Logger.getLogger(SearchWordFile.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:it.polito.tellmefirst.parsing.PDFparser.java
License:Open Source License
public String pdfToText(File file) throws TMFVisibleException { LOG.debug("[pdfToText] - BEGIN"); String result;/*from w ww . j a v a 2 s. c o m*/ if (!file.isFile()) { throw new TMFVisibleException("File in input is actually not a file."); } try { PDFParser parser = new PDFParser(new FileInputStream(file)); parser.parse(); COSDocument cosDoc = parser.getDocument(); PDFTextStripper pdfStripper = new PDFTextStripper(); PDDocument pdDoc = new PDDocument(cosDoc); //pdfStripper.setStartPage(1); //pdfStripper.setEndPage(5); // remove syllabification String parsedTextWithWrap = pdfStripper.getText(pdDoc); result = parsedTextWithWrap.replace("-\n", ""); if (cosDoc != null) cosDoc.close(); if (pdDoc != null) pdDoc.close(); } catch (Exception e) { LOG.error("[pdfToText] - EXCEPTION: ", e); throw new TMFVisibleException("Problem parsing file: the PDF document you uploaded seems malformed."); } LOG.debug("[pdfToText] - END"); return result; }
From source file:it.vige.albopretorio.ocr.OCRExtractAction.java
License:Apache License
public static String getText(InputStream in) throws IOException { PDDocument document = loadNonSeq(in, null); String content = new PDFTextStripper().getText(document).trim(); document.close(); return content; }
From source file:javaapplication2.NewJFrame.java
private void jButton1MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jButton1MouseClicked /* JFileChooser chooser = new JFileChooser(); FileNameExtensionFilter filter = new FileNameExtensionFilter( "ficheiros PDF", "pdf");/*w ww. j av a 2 s. c om*/ chooser.setFileFilter(filter); int returnVal; returnVal = chooser.showOpenDialog(getParent()); if(returnVal == JFileChooser.APPROVE_OPTION) {*/ if (!jTextField1.getText().isEmpty()) { System.out.println("You chose to open this file: " + jTextField1.getText()); String text = null; String[] linhas = null; try { PDDocument doc = PDDocument.load(jTextField1.getText()); PDFTextStripper stripper = new PDFTextStripper(); text = stripper.getText(doc); doc.close(); } catch (IOException ex) { Logger.getLogger(NewJFrame.class.getName()).log(Level.SEVERE, null, ex); } linhas = text.split("\n", -1); ArrayList<Dia> dias = new ArrayList<>(); Refeicao r = null; Dia d = null; Boolean almoco = false; Boolean ignorarDia = false; //dias sem refeio ex. feriados for (String linha : linhas) { if (linha.contains("Segunda-feira") || linha.contains("Tera-feira") || linha.contains("Quarta-feira") || linha.contains("Quinta-feira") || linha.contains("Sexta-feira")) { if (d != null) {// && !ignorarDia){ dias.add(d); // ignorarDia=false; } d = new Dia(); almoco = true; r = new Refeicao(); jTextArea1.setText(jTextArea1.getText() + linha.trim() + " --NOVO DIA \n"); } /* else if(linha.contains("ENCERRADO")){ ignorarDia=true; }*/ else if (isValidDate(linha)) { DateFormat format = new SimpleDateFormat("dd-MM-yyyy", Locale.ENGLISH); try { d.setDia(format.parse(linha)); } catch (ParseException ex) { Logger.getLogger(NewJFrame.class.getName()).log(Level.SEVERE, null, ex); } jTextArea1.setText(jTextArea1.getText() + linha.trim() + " --DATA \n"); } else if (linha.contains("Sopa")) { if (almoco) jTextArea1.setText(jTextArea1.getText() + " ALMOO \n"); else jTextArea1.setText(jTextArea1.getText() + " JANTAR \n"); r.setSopa(linha); jTextArea1.setText(jTextArea1.getText() + linha.trim() + " --SOPA \n"); } else if (linha.contains("Carne")) { r.setCarne(linha); jTextArea1.setText(jTextArea1.getText() + linha.trim() + " --CARNE \n"); } else if (linha.contains("Peixe")) { r.setPeixe(linha); jTextArea1.setText(jTextArea1.getText() + linha.trim() + " --PEIXE \n"); } else if (linha.contains("Sobremesa")) { jTextArea1.setText(jTextArea1.getText() + linha.trim() + " --SOBREMESA \n"); r.setSobremesa(linha); if (almoco) { d.setAlmoco(r); r = new Refeicao(); almoco = false; } else { d.setJantar(r); r = new Refeicao(); almoco = true; } } else { jTextArea1.setText(jTextArea1.getText() + linha.trim() + " --Ignorado \n"); } // jTextArea1.setText(jTextArea1.getText()+t+" a testar\n "); } if (d != null) dias.add(d); jTextArea1.setText(" "); System.out.println("nmero de dias: " + dias.size()); getSelectedButtonText(); getConnection(); for (Dia dia : dias) { //if(dia.dia.compareTo(new Date(2015, 06, 12))==0){ if (dia.dia != null) { System.out.println("dia: " + dia.dia); jTextArea1.setText(jTextArea1.getText() + "\n" + dia.dia); jTextArea1.setText(jTextArea1.getText() + "\n" + "-----ALMOO-----\n"); if (dia.almoco != null) jTextArea1 .setText(jTextArea1.getText() + dia.almoco.getSopa() + "\n" + dia.almoco.getCarne() + "\n" + dia.almoco.getPeixe() + "\n" + dia.almoco.getSobremesa() + "\n"); else jTextArea1.setText(jTextArea1.getText() + "Nao definido" + "\n"); jTextArea1.setText(jTextArea1.getText() + "\n" + "-----JANTAR-----\n"); if (dia.jantar != null) jTextArea1 .setText(jTextArea1.getText() + dia.jantar.getSopa() + "\n" + dia.jantar.getCarne() + "\n" + dia.jantar.getPeixe() + "\n" + dia.jantar.getSobremesa() + "\n"); else jTextArea1.setText(jTextArea1.getText() + "Nao definido" + "\n"); try { String query = " insert into " + getSelectedButtonText() + " (data,temAlmoco,almoco_sopa,almoco_carne,almoco_peixe,almoco_sobremesa,temJantar,jantar_sopa,jantar_carne,jantar_peixe,jantar_sobremesa)" + " values (?,?,?,?,?,?,?,?,?,?,?)"; // create the mysql insert preparedstatement PreparedStatement preparedStmt = conexao.prepareStatement(query); java.sql.Date sqldate = new Date(dia.dia.getTime()); preparedStmt.setDate(1, sqldate); if (dia.almoco != null) { preparedStmt.setInt(2, 1); preparedStmt.setString(3, dia.almoco.getSopa()); preparedStmt.setString(4, dia.almoco.getCarne()); preparedStmt.setString(5, dia.almoco.getPeixe()); preparedStmt.setString(6, dia.almoco.getSobremesa()); } else { preparedStmt.setInt(2, 0); preparedStmt.setString(3, "No Definido"); preparedStmt.setString(4, "No Definido"); preparedStmt.setString(5, "No Definido"); preparedStmt.setString(6, "No Definido"); } if (dia.jantar != null) { preparedStmt.setInt(7, 1); preparedStmt.setString(8, dia.jantar.getSopa()); preparedStmt.setString(9, dia.jantar.getCarne()); preparedStmt.setString(10, dia.jantar.getPeixe()); preparedStmt.setString(11, dia.jantar.getSobremesa()); } else { preparedStmt.setInt(7, 0); preparedStmt.setString(8, "No Definido"); preparedStmt.setString(9, "No Definido"); preparedStmt.setString(10, "No Definido"); preparedStmt.setString(11, "No Definido"); } // execute the preparedstatement preparedStmt.execute(); //System.out.println(text); //jTextArea1.setText(text); } catch (SQLException ex) { Logger.getLogger(NewJFrame.class.getName()).log(Level.SEVERE, null, ex); } } } } }
From source file:javadocofflinesearch.htmlprocessing.PdfAttempter.java
public String pdftoText(InputStream is, boolean stats) throws IOException { PDDocument pdDoc = null; COSDocument cosDoc = null;//from www . j a v a 2 s . c o m try { PDFParser parser = new PDFParser(is); parser.parse(); cosDoc = parser.getDocument(); PDFTextStripper pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); String text = pdfStripper.getText(pdDoc); if (stats) { vc.addAll(text); } return text; } finally { if (cosDoc != null) { cosDoc.close(); } if (pdDoc != null) { pdDoc.close(); } } }
From source file:javaexample.RadialTextPdf.java
License:Open Source License
public void generateDocument() throws IOException, COSVisitorException { PDDocument document = new PDDocument(); try {//from w w w. ja va 2 s . c om // Sets some document metadata. PDDocumentInformation information = new PDDocumentInformation(); information.setTitle("Radial Text PDF example with Apache PDFBox"); information.setAuthor("Andrea Binello (\"andbin\")"); document.setDocumentInformation(information); // Generates and saves the document. generatePage(document); document.save(filename); } finally { try { document.close(); } catch (IOException e) { } } }
From source file:jeanderson.br.converte.Converte.java
public static void converter(String nome) { File arquivo = new File( "/Users/" + System.getProperty("user.name") + "/Google Drive/Notas Fiscais/PDF/" + nome + ".pdf"); if (arquivo.exists()) { try {/*from ww w . j a v a 2s .com*/ PDDocument documento = PDDocument.load(arquivo); List<PDPage> lista = documento.getDocumentCatalog().getAllPages(); int numeroDePagina = 1; for (PDPage paginas : lista) { BufferedImage imagem = paginas.convertToImage(); File saida = new File("/Users/" + System.getProperty("user.name") + "/Google Drive/Notas Fiscais/PNG/" + nome + numeroDePagina + ".png"); ImageIO.write(imagem, "png", saida); numeroDePagina++; } documento.close(); } catch (IOException ex) { Relatar.bug(Converte.class.getName(), ex.toString()); Logger.getLogger(Converte.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:johnbrooksupgrade.PDFService.java
private void SaveActionPerformed(java.awt.event.ActionEvent evt) { Salesman = txtNewUser.getText().isEmpty() ? cmbSalesPerson.getSelectedItem().toString() : txtNewUser.getText();/* ww w .j av a 2s. c om*/ Date = DateInput.getText(); String ClientName = ClientnameInput.getText(); String ProjectName = ProjectNameInput.getText(); if (Branch == null) { JOptionPane.showMessageDialog(null, "Please Select a branch and try again.", "Error", JOptionPane.ERROR_MESSAGE); } else { /*** * This is the logic that prints the information to a PDF ***/ //We want to save the file to Windows' temporary files folder so it can be loaded from there straight away //This temporary file is deleted when the program is exited File myfile = new File("C:\\Windows\\Temp\\ConveyorFile.pdf"); //creates a new pdf if ((myfile) != null) { try { PDDocument doc; PDPage page; doc = new PDDocument(); // Create a new blank page and add it to the document page = new PDPage(); doc.addPage(page); PDFont font = PDType1Font.COURIER_BOLD; PDPageContentStream content = new PDPageContentStream(doc, page); content.beginText(); content.setFont(font, 30); // creates a new page and gives it formatting of text content.moveTextPositionByAmount(110, 600); content.drawString("Technical Specifications "); PDFont font2 = PDType1Font.COURIER; content.setFont(font, 14); content.moveTextPositionByAmount(-50, -65); content.drawString("Date:" + Date); content.moveTextPositionByAmount(0, -14); content.drawString("Sales person: " + Salesman); content.moveTextPositionByAmount(0, -14); content.drawString("Client Name: " + ClientName); content.moveTextPositionByAmount(0, -14); content.drawString("Project Name: " + ProjectName); content.moveTextPositionByAmount(0, -14); content.drawString("Branch: " + Branch); content.moveTextPositionByAmount(0, -32); // Specifications content.setFont(PDType1Font.COURIER_BOLD, 20); content.drawString("Specifications"); content.moveTextPositionByAmount(0, -10); content.setFont(font, 14); content.moveTextPositionByAmount(10, -14); content.drawString("Speed of Belt M/pm: " + mainDataEntry.Speedofbeltanswer29f); content.moveTextPositionByAmount(0, -14); content.drawString( "Drum/Sprocket Mtrs: " + String.format("%.2f", mainDataEntry.Drumdiameterinput1)); content.moveTextPositionByAmount(0, -14); content.drawString("Metres per minute: " + mainDataEntry.getMetresperminuteanswer4f()); content.moveTextPositionByAmount(0, -14); content.drawString("Metres per hour: " + mainDataEntry.getMetresperhouranswer5f()); content.moveTextPositionByAmount(0, -14); content.drawString("Product per hour: " + mainDataEntry.getLengthperhouranswer7f()); content.moveTextPositionByAmount(0, -14); content.drawString("Kg's per hour: " + mainDataEntry.getKgsperhouranswer9f()); content.moveTextPositionByAmount(0, -14); content.drawString("Kg at any given time: " + mainDataEntry.getKgatanygiventimeanswer10f()); content.moveTextPositionByAmount(-10, -28); // Results content.setFont(PDType1Font.COURIER_BOLD, 20); content.drawString("Results"); content.moveTextPositionByAmount(0, -10); content.setFont(font, 14); content.moveTextPositionByAmount(10, -14); content.drawString("RPM: " + String.format("%.2f", mainDataEntry.getRpmconveyor34())); content.moveTextPositionByAmount(0, -14); content.drawString("Application Factor: " + mainDataEntry.getRadiananswer25f()); content.moveTextPositionByAmount(0, -14); content.drawString("Pull Force Kg/f: " + mainDataEntry.PullForce); content.moveTextPositionByAmount(0, -14); content.drawString("Nm Torque: " + mainDataEntry.getNmanswer15f()); content.moveTextPositionByAmount(0, -14); content.drawString("Service Factor: " + mainDataEntry.getServicefactor17f()); content.moveTextPositionByAmount(0, -14); content.drawString("Design Kw: " + mainDataEntry.getDesignkwanswer18f()); content.moveTextPositionByAmount(-10, -28); // Gearbox recommendations // only bother with section if the gearbox details aren't empty if (!mainDataEntry.GearboxDetailsForPDF.isEmpty()) { content.setFont(PDType1Font.COURIER_BOLD, 20); content.drawString("Gearbox/Motor Recommendations"); content.moveTextPositionByAmount(2, -25); content.setFont(PDType1Font.COURIER_BOLD, 18); content.drawString(mainDataEntry.GearboxType + ":"); content.setFont(font, 14); content.moveTextPositionByAmount(10, -25); String[] display; // For the brooks cyclo we need to split the string by these values // then write each index of the resulting array separately so the // result doesn't just run off the page display = mainDataEntry.GearboxDetailsForPDF.get(0).split("Ratio: |Overload "); // Only need to do this when the string has been split out by Ratio or Overload // i.e. this is only the case for brooks cyclo, the other two types fit the page fine if (display.length > 1) { display[1] = "Ratio: " + display[1]; content.drawString(display[0]); content.moveTextPositionByAmount(0, -14); content.drawString(display[1]); if (display.length > 2) { display[2] = "Overload " + display[2]; content.moveTextPositionByAmount(0, -14); content.drawString(display[2]); } } else { // first option must exist for the program to get this far content.drawString(mainDataEntry.GearboxDetailsForPDF.get(0)); } content.moveTextPositionByAmount(0, -14); // only bother with the second option if it exists if (mainDataEntry.GearboxDetailsForPDF.size() > 1) { display = mainDataEntry.GearboxDetailsForPDF.get(1).split("Ratio: |Overload "); if (display.length > 1) { content.moveTextPositionByAmount(0, -15); display[1] = "Ratio: " + display[1]; content.drawString(display[0]); content.moveTextPositionByAmount(0, -14); content.drawString(display[1]); if (display.length > 2) { display[2] = "Overload " + display[2]; content.moveTextPositionByAmount(0, -14); content.drawString(display[2]); } } else { content.moveTextPositionByAmount(0, -14); content.drawString(mainDataEntry.GearboxDetailsForPDF.get(1)); } } } content.endText(); image2 = ImageIO.read(new File("logosmall.jpg")); BufferedImage logo = image2; // writes the image to the file PDXObjectImage jblogo = new PDPixelMap(doc, logo); content.drawImage(jblogo, 20, 650); //postions image content.close(); doc.getDocument(); doc.save(myfile.getAbsolutePath()); // open the file Desktop.getDesktop().open(myfile); doc.close(); close(); //saves pdf and closes it } catch (IOException | COSVisitorException ie) { JOptionPane.showMessageDialog(null, ie.getMessage(), "Error!", JOptionPane.INFORMATION_MESSAGE); } } } }
From source file:khoji.PDFdoc.java
License:Apache License
/** * * @param doc_path//from w w w .ja v a 2s . c o m * @return * @throws Exception */ public String extractPDF(String doc_path) throws Exception { PDDocument document = null; try { document = PDDocument.load(doc_path); if (document.isEncrypted()) { try { document.decrypt(""); } catch (InvalidPasswordException e) { System.err.println("Error: Document is encrypted with a password."); System.exit(1); } } PDFTextStripper stripper = new PDFTextStripper(); String text = ""; text = stripper.getText(document); // System.out.println("text:"+text); return text; } finally { if (document != null) { document.close(); } } }
From source file:main.java.ufg.inf.manutencao.util.UtilTexto.java
/** * Le o conteudo (texto) de um arquivo pdf * * @param caminho// w w w. j a v a2s.c o m */ public static String extraiTextoDoPDF(String caminho) { PDDocument pdfDocument = null; try { pdfDocument = PDDocument.load(caminho); PDFTextStripper stripper = new PDFTextStripper(); String texto = stripper.getText(pdfDocument); return texto; } catch (IOException e) { throw new RuntimeException(e); } finally { if (pdfDocument != null) try { pdfDocument.close(); } catch (IOException e) { throw new RuntimeException(e); } } }