Example usage for com.itextpdf.text.pdf.parser PdfTextExtractor getTextFromPage

List of usage examples for com.itextpdf.text.pdf.parser PdfTextExtractor getTextFromPage

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf.parser PdfTextExtractor getTextFromPage.

Prototype

public static String getTextFromPage(PdfReader reader, int pageNumber) throws IOException 

Source Link

Document

Extract text from a specified page using the default strategy.

Usage

From source file:pdfgen.pdf_generation_try5.java

public void pdfReaderFunction(PdfReader reader) throws IOException {

    String s;//from   w  ww  .  java 2 s  .  c o m
    String u;
    String k;
    Xmltohashmap xh = new Xmltohashmap();
    //System.out.println(xh.hmap);
    int i = reader.getNumberOfPages();
    String s1 = null;
    for (int j = 1; j <= i; j++) {

        s = PdfTextExtractor.getTextFromPage(reader, j);
        k = s.format(s, null);
        // System.out.println(k);
        s1 = s1 + s;
        int st1 = s1.length();
        //   String nullChar=s1.substring(0,4);
        s1 = s1.substring(5, st1);
        //   System.out.println("This is null character"+nullChar);

    }
    Xmlgeneric xg = new Xmlgeneric();
    Iterator<Integer> iter = xg.tmap.keySet().iterator();
    for (int f = 0; f < xg.tmap.size(); f++) {

        /*---------------Jaydatta---------------
         * String key="*"+(String)xh.hmap.keySet().toArray()[f]+"*";
                
        String value=(String)xh.hmap.values().toArray()[f];
        */

        //-----------------------Prasanna----------------

        String key = "";
        String value = "";
        int keyOfMap = iter.next();
        key = "*" + keyOfMap + "*";
        value = xg.tmap.get(keyOfMap);
        //              System.out.println("key :=> "+key+": "+xg.tmap.get(keyOfMap));

        if (f == 0) {
            rep1 = s1.replace(key, value);
        } else {
            rep1 = rep1.replace(key, value);
        }
    }
}

From source file:pdfreadersample.PdfReadersample.java

/**
 * @param args the command line arguments
 *//*from www. j  a  v a  2  s .  co  m*/
public static void main(String[] args) throws IOException {
    // TODO code application logic here
    String[] lstrwordlist = {};
    ArrayList<Integer> lobjlist = new ArrayList<Integer>();
    PdfReader reader = new PdfReader("C:/Users/Varnith/Desktop/TestFilewithNumbers.pdf");
    System.out.println("This PDF has " + reader.getNumberOfPages() + " pages.");
    for (int i = 0; i < reader.getNumberOfPages(); i++) {
        String page = PdfTextExtractor.getTextFromPage(reader, i + 1);
        lstrwordlist = page.trim().split("\n");
        //System.out.println("hello");
        for (int j = 0; j < lstrwordlist.length; j++) {
            lobjlist.add(Integer.valueOf(lstrwordlist[j].trim()));

        }
        //lobjlist.add(lstrwordlist[0]);
        //System.out.println("Page Content:\n\n"+page+"\n\n");
    }
    System.out.println("length of element :" + lobjlist.size());
    //System.out.println("Is this document encrypted: "+reader.isEncrypted());

}

From source file:pdftotextconverter.PDFToTextConverter.java

public static void convertPDFToText(String src, String desc) {
    try {//  w w w .  ja  va 2 s.  c o m
        FileWriter fw = new FileWriter(desc);
        BufferedWriter bw = new BufferedWriter(fw);
        PdfReader pr = new PdfReader(src);
        int pNum = pr.getNumberOfPages();
        for (int page = 1; page <= pNum; page++) {
            String text = PdfTextExtractor.getTextFromPage(pr, page);
            bw.write(text);
            bw.newLine();
        }
        bw.flush();
        bw.close();
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:ProfilEcrivain.PdfReader.java

private static String ReadPDF(String pdf_url) {
    StringBuilder str = new StringBuilder();
    try {/*  w  w  w.  ja  v  a  2  s.  c  om*/

        PdfReader reader = new PdfReader(pdf_url);
        int n = reader.getNumberOfPages();
        for (int i = 1; i < n; i++) {
            String str2 = PdfTextExtractor.getTextFromPage(reader, i);
            str.append(str2);
            System.out.println(str);
        }
    } catch (Exception err) {
        err.printStackTrace();
    }
    return String.format("%s", str);
}

From source file:sampletree.PdfReaderSample.java

public ArrayList<Integer> ReadPdfDocument() throws IOException {
    String[] lstrwordlist = {};//  w  w w.ja v  a2 s . c o m
    ArrayList<Integer> lobjlist = new ArrayList<Integer>();
    PdfReader reader = new PdfReader("C:/Users/Varnith/Desktop/TestFilewithNumbers.pdf");
    System.out.println("This PDF has " + reader.getNumberOfPages() + " pages.");
    for (int i = 0; i < reader.getNumberOfPages(); i++) {
        String page = PdfTextExtractor.getTextFromPage(reader, i + 1);
        lstrwordlist = page.trim().split("\n");
        //System.out.println("hello");
        for (int j = 0; j < lstrwordlist.length; j++) {
            lobjlist.add(Integer.valueOf(lstrwordlist[j].trim()));

        }
        //lobjlist.add(lstrwordlist[0]);
        //System.out.println("Page Content:\n\n"+page+"\n\n");
    }
    System.out.println("length of element :" + lobjlist.size());
    return lobjlist;
}

From source file:tan.jam.jsf.OrignalFileUploadBean.java

private void parseFiles() {
    for (UploadedFile f : uploadedFiles) {
        try {/*from  ww  w  . java 2 s.com*/
            PdfReader reader = new PdfReader(f.getInputstream());
            String[] pages = new String[reader.getNumberOfPages()];
            for (int a = 0; a < pages.length; a++) {
                pages[a] = PdfTextExtractor.getTextFromPage(reader, a + 1);
            }
            if (pages[0].length() > 1) {
                orignalFiles.add(pages);
            } else {
                System.out.println("File is SCANNED");

            }
        } catch (IOException ex) {

            FacesMessage message = new FacesMessage("Error Parsing File ... ");
            FacesContext.getCurrentInstance().addMessage(null, message);
            Logger.getLogger(OrignalFileUploadBean.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

From source file:tutorials.readpdf.readpdf.java

public static void main(String[] args) {

    try {//from w  ww  . j  a v a 2s  . c  om

        PdfReader reader = new PdfReader("test.pdf");
        System.out.println("This PDF has " + reader.getNumberOfPages() + " pages.");
        System.out.println("Is this document tampered: " + reader.isTampered());
        System.out.println("Is this document encrypted: " + reader.isEncrypted());
        for (int i = 1; i <= reader.getNumberOfPages(); i++) {
            String page = PdfTextExtractor.getTextFromPage(reader, i);
            System.out.println("Page Content:\n\n" + page + "\n\n");
        }
        reader.close();

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:uk.bl.dpt.qa.flint.wrappers.iTextWrapper.java

License:Apache License

/**
 * Check if a PDF file is valid or not//from  ww w.  j a  v  a 2s  . c  o  m
 * @param pFile file to check
 * @return whether the file is valid or not
 */
public boolean isValid(File pFile) {

    boolean ret = false;

    PdfReader reader = null;
    try {
        reader = new PdfReader(pFile.getAbsolutePath());
        LOGGER.debug("validating through {} pages of {}", reader.getNumberOfPages(), pFile.getName());
        for (int i = 0; i < reader.getNumberOfPages(); i++) {
            //page numbers start at 1
            PdfTextExtractor.getTextFromPage(reader, (i + 1));
        }
        ret = true;
    } catch (BadPasswordException e) {
        //actually an error???
    } catch (InvalidPdfException e) {
        LOGGER.warn("InvalidPdfException leads to invalidity: {}", e);
    } catch (IOException e) {
        LOGGER.warn("IOException leads to invalidity: {}", e);
    } catch (Exception e) {
        LOGGER.warn("Exception leads to invalidity: {}", e);
    } finally {
        if (reader != null)
            reader.close();
    }

    return ret;
}

From source file:uk.bl.wa.tika.parser.pdf.itext.PDFParser.java

License:Apache License

private static String extractText(PdfReader reader) {
    StringBuilder output = new StringBuilder();
    try {/*w w  w . j  a  v a2 s.c o  m*/
        int numPages = reader.getNumberOfPages();
        int page = 1;
        while (page <= numPages) {
            output.append(PdfTextExtractor.getTextFromPage(reader, page));
            page++;
        }
    } catch (Exception e) {
        System.err.println("PDFParser.extractText(): " + e.getMessage());
    }
    return output.toString();
}