List of usage examples for com.itextpdf.text.pdf.parser PdfTextExtractor getTextFromPage
public static String getTextFromPage(PdfReader reader, int pageNumber) throws IOException
From source file:pdfgen.pdf_generation_try5.java
public void pdfReaderFunction(PdfReader reader) throws IOException { String s;//from w ww . java 2 s . c o m String u; String k; Xmltohashmap xh = new Xmltohashmap(); //System.out.println(xh.hmap); int i = reader.getNumberOfPages(); String s1 = null; for (int j = 1; j <= i; j++) { s = PdfTextExtractor.getTextFromPage(reader, j); k = s.format(s, null); // System.out.println(k); s1 = s1 + s; int st1 = s1.length(); // String nullChar=s1.substring(0,4); s1 = s1.substring(5, st1); // System.out.println("This is null character"+nullChar); } Xmlgeneric xg = new Xmlgeneric(); Iterator<Integer> iter = xg.tmap.keySet().iterator(); for (int f = 0; f < xg.tmap.size(); f++) { /*---------------Jaydatta--------------- * String key="*"+(String)xh.hmap.keySet().toArray()[f]+"*"; String value=(String)xh.hmap.values().toArray()[f]; */ //-----------------------Prasanna---------------- String key = ""; String value = ""; int keyOfMap = iter.next(); key = "*" + keyOfMap + "*"; value = xg.tmap.get(keyOfMap); // System.out.println("key :=> "+key+": "+xg.tmap.get(keyOfMap)); if (f == 0) { rep1 = s1.replace(key, value); } else { rep1 = rep1.replace(key, value); } } }
From source file:pdfreadersample.PdfReadersample.java
/** * @param args the command line arguments *//*from www. j a v a 2 s . co m*/ public static void main(String[] args) throws IOException { // TODO code application logic here String[] lstrwordlist = {}; ArrayList<Integer> lobjlist = new ArrayList<Integer>(); PdfReader reader = new PdfReader("C:/Users/Varnith/Desktop/TestFilewithNumbers.pdf"); System.out.println("This PDF has " + reader.getNumberOfPages() + " pages."); for (int i = 0; i < reader.getNumberOfPages(); i++) { String page = PdfTextExtractor.getTextFromPage(reader, i + 1); lstrwordlist = page.trim().split("\n"); //System.out.println("hello"); for (int j = 0; j < lstrwordlist.length; j++) { lobjlist.add(Integer.valueOf(lstrwordlist[j].trim())); } //lobjlist.add(lstrwordlist[0]); //System.out.println("Page Content:\n\n"+page+"\n\n"); } System.out.println("length of element :" + lobjlist.size()); //System.out.println("Is this document encrypted: "+reader.isEncrypted()); }
From source file:pdftotextconverter.PDFToTextConverter.java
public static void convertPDFToText(String src, String desc) { try {// w w w . ja va 2 s. c o m FileWriter fw = new FileWriter(desc); BufferedWriter bw = new BufferedWriter(fw); PdfReader pr = new PdfReader(src); int pNum = pr.getNumberOfPages(); for (int page = 1; page <= pNum; page++) { String text = PdfTextExtractor.getTextFromPage(pr, page); bw.write(text); bw.newLine(); } bw.flush(); bw.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:ProfilEcrivain.PdfReader.java
private static String ReadPDF(String pdf_url) { StringBuilder str = new StringBuilder(); try {/* w w w. ja v a 2 s. c om*/ PdfReader reader = new PdfReader(pdf_url); int n = reader.getNumberOfPages(); for (int i = 1; i < n; i++) { String str2 = PdfTextExtractor.getTextFromPage(reader, i); str.append(str2); System.out.println(str); } } catch (Exception err) { err.printStackTrace(); } return String.format("%s", str); }
From source file:sampletree.PdfReaderSample.java
public ArrayList<Integer> ReadPdfDocument() throws IOException { String[] lstrwordlist = {};// w w w.ja v a2 s . c o m ArrayList<Integer> lobjlist = new ArrayList<Integer>(); PdfReader reader = new PdfReader("C:/Users/Varnith/Desktop/TestFilewithNumbers.pdf"); System.out.println("This PDF has " + reader.getNumberOfPages() + " pages."); for (int i = 0; i < reader.getNumberOfPages(); i++) { String page = PdfTextExtractor.getTextFromPage(reader, i + 1); lstrwordlist = page.trim().split("\n"); //System.out.println("hello"); for (int j = 0; j < lstrwordlist.length; j++) { lobjlist.add(Integer.valueOf(lstrwordlist[j].trim())); } //lobjlist.add(lstrwordlist[0]); //System.out.println("Page Content:\n\n"+page+"\n\n"); } System.out.println("length of element :" + lobjlist.size()); return lobjlist; }
From source file:tan.jam.jsf.OrignalFileUploadBean.java
private void parseFiles() { for (UploadedFile f : uploadedFiles) { try {/*from ww w . java 2 s.com*/ PdfReader reader = new PdfReader(f.getInputstream()); String[] pages = new String[reader.getNumberOfPages()]; for (int a = 0; a < pages.length; a++) { pages[a] = PdfTextExtractor.getTextFromPage(reader, a + 1); } if (pages[0].length() > 1) { orignalFiles.add(pages); } else { System.out.println("File is SCANNED"); } } catch (IOException ex) { FacesMessage message = new FacesMessage("Error Parsing File ... "); FacesContext.getCurrentInstance().addMessage(null, message); Logger.getLogger(OrignalFileUploadBean.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:tutorials.readpdf.readpdf.java
public static void main(String[] args) { try {//from w ww . j a v a 2s . c om PdfReader reader = new PdfReader("test.pdf"); System.out.println("This PDF has " + reader.getNumberOfPages() + " pages."); System.out.println("Is this document tampered: " + reader.isTampered()); System.out.println("Is this document encrypted: " + reader.isEncrypted()); for (int i = 1; i <= reader.getNumberOfPages(); i++) { String page = PdfTextExtractor.getTextFromPage(reader, i); System.out.println("Page Content:\n\n" + page + "\n\n"); } reader.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:uk.bl.dpt.qa.flint.wrappers.iTextWrapper.java
License:Apache License
/** * Check if a PDF file is valid or not//from ww w. j a v a 2s . c o m * @param pFile file to check * @return whether the file is valid or not */ public boolean isValid(File pFile) { boolean ret = false; PdfReader reader = null; try { reader = new PdfReader(pFile.getAbsolutePath()); LOGGER.debug("validating through {} pages of {}", reader.getNumberOfPages(), pFile.getName()); for (int i = 0; i < reader.getNumberOfPages(); i++) { //page numbers start at 1 PdfTextExtractor.getTextFromPage(reader, (i + 1)); } ret = true; } catch (BadPasswordException e) { //actually an error??? } catch (InvalidPdfException e) { LOGGER.warn("InvalidPdfException leads to invalidity: {}", e); } catch (IOException e) { LOGGER.warn("IOException leads to invalidity: {}", e); } catch (Exception e) { LOGGER.warn("Exception leads to invalidity: {}", e); } finally { if (reader != null) reader.close(); } return ret; }
From source file:uk.bl.wa.tika.parser.pdf.itext.PDFParser.java
License:Apache License
private static String extractText(PdfReader reader) { StringBuilder output = new StringBuilder(); try {/*w w w . j a v a2 s.c o m*/ int numPages = reader.getNumberOfPages(); int page = 1; while (page <= numPages) { output.append(PdfTextExtractor.getTextFromPage(reader, page)); page++; } } catch (Exception e) { System.err.println("PDFParser.extractText(): " + e.getMessage()); } return output.toString(); }