Example usage for com.itextpdf.text.pdf.parser LocationTextExtractionStrategy LocationTextExtractionStrategy

Introduction

In this page you can find the example usage for com.itextpdf.text.pdf.parser LocationTextExtractionStrategy LocationTextExtractionStrategy.

Prototype

public LocationTextExtractionStrategy()

Source Link

Document

Creates a new text extraction renderer.

Usage

From source file:com.erikHolz.vertretungsplan.Converter.java

License:Open Source License

public void parsePDF() throws IOException {

    PdfReader reader = new PdfReader(fileDest + ".pdf");
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    PrintWriter out = new PrintWriter(new FileOutputStream(fileDest + "__.txt"));

    TextExtractionStrategy strategy;/*from www.  ja v a2  s.com*/
    for (int intI = 1; intI <= reader.getNumberOfPages(); intI++) {
        strategy = parser.processContent(intI, new LocationTextExtractionStrategy());
        out.println(strategy.getResultantText());
    }

    out.flush();
    out.close();
    reader.close();

    // lschen der ursprnglichen pdf
    File f = new File(fileDest + ".pdf");
    if (f.exists())
        f.delete();
}

From source file:javaapplication1.JavaApplication1.java

/**
 * Parses a specific area of a PDF to a plain text file.
 * @param pdf the original PDF/*ww w  .  ja v  a2  s  .com*/
 * @param txt the resulting text
 * @throws IOException
 */
public String mycheckline(PdfReader reader, int pheight, int pwidth, int lh, int page) throws IOException {
    // PrintWriter out = new PrintWriter(new FileOutputStream(txt));
    Rectangle rect = new Rectangle(0, pheight - lh, pwidth / 3, pheight - lh - fontchecksize);
    RenderFilter filter = new RegionTextRenderFilter(rect);
    TextExtractionStrategy strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(),
            filter);
    return (PdfTextExtractor.getTextFromPage(reader, page, strategy));
}

From source file:org.sinarproject.ecparser.ECRedelineation.java

private static void processECStructure(String mymeta) {
    // TODO: Restructure all the stuff from main into here ..
    int n = my_reader.getNumberOfPages();
    int i;//from   w ww .j  ava  2 s.  c om
    // Maps init .. Key is <PAR_CODE>/<DUN_CODE>/<DM_CODE>
    // <Key> -> Name:Population
    // Errors -> Map<ErrKey, Original String>; ErrKey is Nxx
    final_mapped_data = new TreeMap<>();
    error_while_parsing = new TreeMap<>();
    // Loop through each page ..
    for (i = 1; i < n; i++) {
        try {
            String content;
            content = PdfTextExtractor.getTextFromPage(my_reader, i, new LocationTextExtractionStrategy());
            // content = PdfTextExtractor.getTextFromPage(reader, i);
            describePage(content, i);
        } catch (IOException ex) {
            Logger.getLogger(ECRedelineation.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    // Dump out error hash .. if any
    if (error_while_parsing.size() > 0) {
        out.println("==============================");
        out.println(" PARSING ERRORS --> " + error_while_parsing.size() + " errors!!!");
        out.println("DUN: " + DUNerrors + " DM: " + DMerrors + " Fixed:" + fixedDMs);
        out.println("==============================");
        // TODO: Shift to file output
        /*
        for (Map.Entry<String, String> single_report_entry : error_while_parsing.entrySet()) {
        out.println("CODE: " + single_report_entry.getKey());
        out.println("UNMATCHED: " + single_report_entry.getValue());
        }
        */
    } else {
        out.println("========================");
        out.println("      ALL OK!!!         ");
        out.println("========================");
    }

    out.println("========================");
    out.println("  @@@@ Data!!! @@@@     ");
    out.println("========================");
    out.println("Final DM count: " + countedDM);
    // Detect if there any population not being able to be detected; so action can be taken
    for (Map.Entry<String, String> single_data_entry : final_mapped_data.entrySet()) {
        // Output those that were not able to be auto-corrected ..
        if (single_data_entry.getValue().endsWith(":0")) {
            out.print("KEY:" + single_data_entry.getKey());
            out.println(" ==> " + single_data_entry.getValue());
        }
        ;
    }
    if (null != mymeta) // write down Output
    // No JSON output in this cut :( Leave it for interaction with golang
    //  and shapefile mapping and manipulatons ..
    {
        switch (mymeta) {
        case "json":
            Utils.writeJSONMappedData();
            break;
        case "csv":
            // go direct to CSV ..
            Utils.writeCSVFinalData();
            break;
        }
    }

    out.println("xxxxxxxXXXXXXXXXXxxxxxxxxx");
}

From source file:org.sinarproject.ECRedelineation.java

/**
 * @param args the command line arguments
 *//* w  w w  .java2 s . co  m*/
public static void main(String[] args) {
    // TODO code application logic here
    out.println("Sinar Project's EC Parser ..");
    PdfReader reader = null;
    try {
        reader = new PdfReader(SOURCE);
        int n;
        n = reader.getNumberOfPages();
        int i;
        // Maps init .. Key is <PAR_CODE>/<DUN_CODE>/<DM_CODE>
        // <Key> -> Name:Population
        // Errors -> Map<ErrKey, Original String>; ErrKey is Nxx
        final_mapped_data = new TreeMap<>();
        error_while_parsing = new TreeMap<>();
        // Loop through each page ..
        for (i = 1; i < n; i++) {
            try {
                String content;
                content = PdfTextExtractor.getTextFromPage(reader, i, new LocationTextExtractionStrategy());
                // content = PdfTextExtractor.getTextFromPage(reader, i);
                describePage(content, i);
            } catch (IOException ex) {
                Logger.getLogger(ECRedelineation.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        // Dump out error hash .. if any
        if (error_while_parsing.size() > 0) {
            out.println("==============================");
            out.println(" PARSING ERRORS --> " + error_while_parsing.size() + " errors!!!");
            out.println("DUN: " + DUNerrors + " DM: " + DMerrors + " Fixed:" + fixedDMs);
            out.println("==============================");
            // TODO: Shift to file output
            /*
             for (Map.Entry<String, String> single_report_entry : error_while_parsing.entrySet()) {
             out.println("CODE: " + single_report_entry.getKey());
             out.println("UNMATCHED: " + single_report_entry.getValue());
             }
             */
        } else {
            out.println("========================");
            out.println("      ALL OK!!!         ");
            out.println("========================");
        }

        out.println("========================");
        out.println("  @@@@ Data!!! @@@@     ");
        out.println("========================");
        out.println("Final DM count: " + countedDM);
        // Detect if there any population not being able to be detected; so action can be taken
        for (Map.Entry<String, String> single_data_entry : final_mapped_data.entrySet()) {
            // Output those that were not able to be auto-corrected ..
            if (single_data_entry.getValue().endsWith(":0")) {
                out.print("KEY:" + single_data_entry.getKey());
                out.println(" ==> " + single_data_entry.getValue());
            }
            ;
        }
        // write down Output
        // No JSON output in this cut :( Leave it for interaction with golang
        //  and shapefile mapping and manipulatons ..
        Utils.writeJSONMappedData();
        // go direct to CSV ..
        // Utils.writeCSVFinalData();
        out.println("xxxxxxxXXXXXXXXXXxxxxxxxxx");
    } catch (IOException ex) {
        Logger.getLogger(ECRedelineation.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:Project.data.preparation.ExtractPageContentArea.java

public void parsePdf(String pdf, int pageNum, int upper_x, int upper_y, int lower_x, int lower_y)
        throws IOException {
    PdfReader reader = new PdfReader(pdf);
    //        System.out.println("(" + upper_x + " , " + upper_y + ") to ( " + lower_x + " , " + lower_y + ")");
    rect = new Rectangle(upper_x, upper_y, lower_x, lower_y);
    RenderFilter filter = new RegionTextRenderFilter(getRect());
    TextExtractionStrategy strategy;/*from  ww  w.ja  va 2s .  co m*/
    strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
    TextCropped = PdfTextExtractor.getTextFromPage(reader, pageNum, strategy);
    setTextCropped(TextCropped);
    reader.close();
}