List of usage examples for org.apache.pdfbox.io RandomAccessBufferedFileInputStream RandomAccessBufferedFileInputStream
public RandomAccessBufferedFileInputStream(InputStream input) throws IOException
From source file:com.sastix.cms.common.services.htmltopdf.PdfTest.java
License:Apache License
@Test public void testPdfFromStringTo() throws Exception { // GIVEN an html template containing special characters that java stores in utf-16 internally Pdf pdf = pdfBuilder.build();//w ww. jav a 2s .com pdf.addPage("<html><head><meta charset=\"utf-8\"></head><h1>Mller</h1></html>", PageType.htmlAsString); String tempFolder = temporaryFolder.newFolder().getPath(); pdf.saveAs(tempFolder + "/output.pdf"); // WHEN byte[] pdfBytes = pdf.getPDF(); PDFParser parser = new PDFParser( new RandomAccessBufferedFileInputStream(new ByteArrayInputStream(pdfBytes))); // that is a valid PDF (otherwise an IOException occurs) parser.parse(); PDFTextStripper pdfTextStripper = new PDFTextStripper(); String pdfText = pdfTextStripper.getText(new PDDocument(parser.getDocument())); assertThat("document should contain the creditorName", pdfText, containsString("Mller")); }
From source file:com.validation.manager.core.server.core.AttachmentServerTest.java
License:Apache License
/** * Test of addFile method, of class AttachmentServer. *//*from w ww . j a v a 2 s . com*/ @Test public void testAddRetrieveTextFile() { try { System.out.println("add text File"); File f = new File("target/Test.txt"); f.deleteOnExit(); List<String> lines = Arrays.asList("The first line", "The second line"); Path file = Paths.get(f.getAbsolutePath()); Files.write(file, lines, Charset.forName("UTF-8")); AttachmentServer instance = new AttachmentServer(); instance.addFile(f, f.getName()); instance.write2DB(); //Delete the file FileUtils.delete(f.getAbsolutePath()); assertEquals(1, (int) instance.getAttachmentType().getId());//Text file System.out.println("retrieveFile"); AttachmentServer temp = new AttachmentServer(instance.getAttachmentPK()); File loadedFile = temp.getAttachedFile("target/loaded/"); BufferedReader br = new BufferedReader(new FileReader(loadedFile)); String line; int count = 0; while ((line = br.readLine()) != null) { assertEquals(lines.get(count), line); System.out.println(line); count++; } assertEquals(lines.size(), count); //Create pdf file System.out.println("add pdf File"); File pdf = Tool.convertToPDF(loadedFile, "target/Text.pdf"); pdf.deleteOnExit(); instance = new AttachmentServer(); instance.addFile(pdf, pdf.getName()); instance.write2DB(); //Delete the file FileUtils.delete(pdf.getAbsolutePath()); assertEquals(2, (int) instance.getAttachmentType().getId());//PDF file System.out.println("retrieveFile"); temp = new AttachmentServer(instance.getAttachmentPK()); loadedFile = temp.getAttachedFile("target/loaded/"); PDFTextStripper pdfStripper; PDDocument pdDoc = null; COSDocument cosDoc = null; try { PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(loadedFile)); parser.parse(); cosDoc = parser.getDocument(); pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); pdfStripper.setStartPage(1); pdfStripper.setEndPage(1); String parsedText = pdfStripper.getText(pdDoc); System.out.println(parsedText); } catch (IOException ex) { Exceptions.printStackTrace(ex); fail(); } finally { if (cosDoc != null) { cosDoc.close(); } if (pdDoc != null) { pdDoc.close(); } } } catch (IOException | VMException ex) { Exceptions.printStackTrace(ex); fail(); } }
From source file:cz.mzk.editor.server.handler.GetOcrFromPdfHandler.java
License:Open Source License
private String pdftoText(String fileName) throws ActionException { File pdfFile = new File(fileName); if (!pdfFile.isFile()) { LOGGER.error("The file: " + fileName + " does not exist."); throw new ActionException("Unable to parse the pdf file."); }/*from w ww . j a v a 2 s . c o m*/ PDFParser parser = null; COSDocument cosDoc = null; PDFTextStripper pdfStripper; PDDocument pdDoc = null; String parsedText; try { parser = new PDFParser(new RandomAccessBufferedFileInputStream(new FileInputStream(pdfFile))); } catch (Exception e) { LOGGER.error("Unable to open PDF Parser.: " + e); e.printStackTrace(); throw new ActionException("Unable to parse the pdf file."); } try { parser.parse(); cosDoc = parser.getDocument(); pdfStripper = new PDFTextStripper(); pdDoc = new PDDocument(cosDoc); parsedText = pdfStripper.getText(pdDoc); } catch (Exception e) { LOGGER.error("An exception occured in parsing the PDF Document."); e.printStackTrace(); throw new ActionException("Unable to parse the pdf file. " + e); } finally { try { if (cosDoc != null) cosDoc.close(); if (pdDoc != null) pdDoc.close(); } catch (Exception e) { e.printStackTrace(); } } return parsedText; }
From source file:org.opensingular.lib.commons.pdf.TestPDFUtil.java
License:Apache License
private int countPages(File file) { try (RandomAccessBufferedFileInputStream in = new RandomAccessBufferedFileInputStream(file)) { PDFParser parser = new PDFParser(in); parser.parse();/*from w w w . java2 s . com*/ PDDocument doc = parser.getPDDocument(); int pages = doc.getNumberOfPages(); doc.close(); return pages; } catch (IOException e) { throw new RuntimeException(e); } }