Example usage for org.apache.pdfbox.text PDFTextStripperByArea getTextForRegion

List of usage examples for org.apache.pdfbox.text PDFTextStripperByArea getTextForRegion

Introduction

In this page you can find the example usage for org.apache.pdfbox.text PDFTextStripperByArea getTextForRegion.

Prototype

public String getTextForRegion(String regionName) 

Source Link

Document

Get the text for the region, this should be called after extractRegions().

Usage

From source file:com.amolik.misc.ExtractTextByArea.java

License:Apache License

/**
 * This will print the documents text in a certain area.
 *
 * @param args The command line arguments.
 *
 * @throws IOException If there is an error parsing the document.
 *///  ww w . ja  va 2  s. com
public static void main(String[] args) throws IOException {
    //args[0]= "E:\\Automation\\uphillit\\Fiscal_demo_data.pdf";
    //        if( args.length != 1 )
    //        {
    //            usage();
    //        }
    //        else
    //        {
    PDDocument document = null;
    try {
        document = PDDocument.load(new File("E:\\Automation\\uphillit\\Fiscal_demo_data.pdf"));
        int numberOfPages = document.getNumberOfPages();
        if (numberOfPages > 0) {

            PDPage page = (PDPage) document.getPages().get(0);
            System.out.println(page.getContents());
        }
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        stripper.setSortByPosition(true);
        Rectangle rect = new Rectangle(3, 1, 600, 6000);
        stripper.addRegion("class1", rect);
        PDPage firstPage = document.getPage(0);
        stripper.extractRegions(firstPage);
        System.out.println("Text in the area:" + rect);
        System.out.println(stripper.getTextForRegion("class1"));
    } finally {
        if (document != null) {
            document.close();
        }
    }
    //       }
}

From source file:net.bookinaction.ExtractAnnotations.java

License:Apache License

public void doJob(String job, Float[] pA) throws IOException {

    PDDocument document = null;//from  www.j a  v  a2  s. co  m

    Stamper s = new Stamper(); // utility class

    final String job_file = job + ".pdf";
    final String dic_file = job + "-dict.txt";
    final String new_job = job + "-new.pdf";

    PrintWriter writer = new PrintWriter(dic_file);

    ImageLocationListener imageLocationsListener = new ImageLocationListener();
    AnnotationMaker annotMaker = new AnnotationMaker();

    try {
        document = PDDocument.load(new File(job_file));

        int pageNum = 0;
        for (PDPage page : document.getPages()) {
            pageNum++;

            PDRectangle cropBox = page.getCropBox();

            List<PDAnnotation> annotations = page.getAnnotations();

            // extract image locations
            List<Rectangle2D> imageRects = new ArrayList<Rectangle2D>();
            imageLocationsListener.setImageRects(imageRects);
            imageLocationsListener.processPage(page);

            int im = 0;
            for (Rectangle2D pdImageRect : imageRects) {
                s.recordImage(writer, pageNum, "[im" + im + "]", (Rectangle2D.Float) pdImageRect);
                annotations.add(annotMaker.squareAnnotation(Color.YELLOW, (Rectangle2D.Float) pdImageRect,
                        "[im" + im + "]"));
                im++;
            }

            PDFTextStripperByArea stripper = new PDFTextStripperByArea();

            int j = 0;
            List<PDAnnotation> viableAnnots = new ArrayList();

            for (PDAnnotation annot : annotations) {
                if (annot instanceof PDAnnotationTextMarkup || annot instanceof PDAnnotationLink) {

                    stripper.addRegion(Integer.toString(j++), s.getAwtRect(
                            s.adjustedRect(annot.getRectangle(), pA[0], pA[1], pA[2], pA[3]), cropBox));
                    viableAnnots.add(annot);

                } else if (annot instanceof PDAnnotationPopup || annot instanceof PDAnnotationText) {
                    viableAnnots.add(annot);

                }
            }

            stripper.extractRegions(page);

            List<PDRectangle> rects = new ArrayList<PDRectangle>();

            List<String> comments = new ArrayList<String>();
            List<String> highlightTexts = new ArrayList<String>();

            j = 0;
            for (PDAnnotation viableAnnot : viableAnnots) {

                if (viableAnnot instanceof PDAnnotationTextMarkup) {
                    String highlightText = stripper.getTextForRegion(Integer.toString(j++));
                    String withoutCR = highlightText.replace((char) 0x0A, '^');

                    String comment = viableAnnot.getContents();

                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    PDRectangle aRect = s.adjustedRect(viableAnnot.getRectangle(), pA[4], pA[5], pA[6], pA[7]);
                    rects.add(aRect);
                    comments.add(comment);
                    highlightTexts.add(highlightText);

                    s.recordTextMarkup(writer, pageNum, comment, withoutCR, aRect, colorString);

                } else if (viableAnnot instanceof PDAnnotationText) {
                    String comment = viableAnnot.getContents();
                    String colorString = String.format("%06x", viableAnnot.getColor().toRGB());

                    for (Rectangle2D pdImageRect : imageRects) {
                        if (pdImageRect.contains(viableAnnot.getRectangle().getLowerLeftX(),
                                viableAnnot.getRectangle().getLowerLeftY())) {
                            s.recordTextMarkup(writer, pageNum, comment, "", (Rectangle2D.Float) pdImageRect,
                                    colorString);
                            annotations.add(annotMaker.squareAnnotation(Color.GREEN,
                                    (Rectangle2D.Float) pdImageRect, comment));
                        }
                        ;
                    }
                }
            }
            PDPageContentStream canvas = new PDPageContentStream(document, page, true, true, true);

            int i = 0;
            for (PDRectangle pdRect : rects) {
                String comment = comments.get(i);
                String highlightText = highlightTexts.get(i);
                //annotations.add(linkAnnotation(pdRect, comment, highlightText));
                //annotations.add(annotationSquareCircle(pdRect, BLUE));
                s.showBox(canvas, new Rectangle2D.Float(pdRect.getLowerLeftX(), pdRect.getUpperRightY(),
                        pdRect.getWidth(), pdRect.getHeight()), cropBox, Color.BLUE);

                i++;
            }
            canvas.close();
        }
        writer.close();
        document.save(new_job);

    } finally {
        if (document != null) {
            document.close();
        }

    }

}

From source file:org.xwiki.test.misc.PDFTest.java

License:Open Source License

/**
 * Code adapted from http://www.docjar.com/html/api/org/apache/pdfbox/examples/pdmodel/PrintURLs.java.html
 *//*from   w  w w  .j  a v  a 2  s  .  c om*/
private Map<String, PDAction> extractLinks(PDPage page) throws Exception {
    Map<String, PDAction> links = new HashMap<String, PDAction>();
    PDFTextStripperByArea stripper = new PDFTextStripperByArea();
    List<PDAnnotation> annotations = page.getAnnotations();
    // First setup the text extraction regions.
    for (int j = 0; j < annotations.size(); j++) {
        PDAnnotation annotation = annotations.get(j);
        if (annotation instanceof PDAnnotationLink) {
            PDAnnotationLink link = (PDAnnotationLink) annotation;
            PDRectangle rect = link.getRectangle();
            // Need to reposition link rectangle to match text space.
            float x = rect.getLowerLeftX();
            float y = rect.getUpperRightY();
            float width = rect.getWidth();
            float height = rect.getHeight();
            int rotation = page.getRotation();
            if (rotation == 0) {
                PDRectangle pageSize = page.getMediaBox();
                y = pageSize.getHeight() - y;
            } else if (rotation == 90) {
                // Do nothing.
            }

            Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
            stripper.addRegion(String.valueOf(j), awtRect);
        }
    }

    stripper.extractRegions(page);

    for (int j = 0; j < annotations.size(); j++) {
        PDAnnotation annotation = annotations.get(j);
        if (annotation instanceof PDAnnotationLink) {
            PDAnnotationLink link = (PDAnnotationLink) annotation;
            String label = stripper.getTextForRegion(String.valueOf(j)).trim();
            links.put(label, link.getAction());
        }
    }

    return links;
}

From source file:org.xwiki.test.misc.PDFTest.java

License:Open Source License

private String getDestinationText(PDPageXYZDestination destination) throws Exception {
    PDFTextStripperByArea stripper = new PDFTextStripperByArea();
    stripper.addRegion("destination", getRectangleBelowDestination(destination));
    stripper.extractRegions(destination.getPage());
    return stripper.getTextForRegion("destination").trim();
}

From source file:pdfconverter.converter3.java

@SuppressWarnings("deprecation")

public static void main(String[] args) throws IOException, WriteException {
    workbook = Workbook.createWorkbook(new File(output));
    System.out.println("File created");
    WritableSheet sheet = workbook.createSheet("Page", 0);
    ExcelStart(sheet);//from  ww w. j a v  a  2s  . co  m

    //Scanner user_input = new Scanner( System.in );
    File dir = new File(path);
    //System.out.println(dir.getPath());
    File[] dirList = dir.listFiles(new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            return name.endsWith(".pdf");
        }
    });

    int counter = 1;
    PDDocument pd;
    PDFTextStripper stripper = new PDFTextStripper();
    PDFTextStripperByArea areaSearch = new PDFTextStripperByArea();
    PDFTextStripperByArea stripper2 = new PDFTextStripperByArea();
    PDFTextStripperByArea stripper3 = new PDFTextStripperByArea();
    //PDRectangle rect = new PDRectangle(0, 0, 100, 100);
    stripper.setStartPage(1); //Start extracting from page 3
    stripper.setEndPage(1); //Extract till page 5
    File f = new File(dirList[0].getPath());

    pd = PDDocument.load(f);
    //int curHeight = 136;
    //int rowCount = 37;
    int curHeight = 116;
    int rowCount = 39;
    int rowHeight = 9;
    int sheetRowCount = 0;
    int pageStop = 1491;

    for (int curpage = 800; curpage < pageStop; curpage++) {
        if (counter > 800) {
            break;
        }
        PDPage page = pd.getPage(curpage);

        System.out.println("Now parsing page " + curpage);
        for (int curRow = 0; curRow < 80; curRow++) {
            Rectangle2D.Float cell = new Rectangle2D.Float(0, curHeight, 80, rowHeight);
            String name = "cell-1-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            String text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 0, sheetRowCount + 1);

            cell = new Rectangle2D.Float(80, curHeight, 30, rowHeight);
            name = "cell-2-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 1, sheetRowCount + 1);

            cell = new Rectangle2D.Float(110, curHeight, 40, rowHeight);
            name = "cell-3-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 2, sheetRowCount + 1);

            cell = new Rectangle2D.Float(150, curHeight, 120, rowHeight);
            name = "cell-4-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 3, sheetRowCount + 1);

            cell = new Rectangle2D.Float(270, curHeight, 120, rowHeight);
            name = "cell-5-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 4, sheetRowCount + 1);

            cell = new Rectangle2D.Float(390, curHeight, 40, rowHeight);
            name = "cell-6-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 5, sheetRowCount + 1);

            cell = new Rectangle2D.Float(430, curHeight, 46, rowHeight);
            name = "cell-7-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 6, sheetRowCount + 1);

            cell = new Rectangle2D.Float(476, curHeight, 82, rowHeight);
            name = "cell-8-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 7, sheetRowCount + 1);

            cell = new Rectangle2D.Float(558, curHeight, 65, rowHeight);
            name = "cell-9-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 8, sheetRowCount + 1);

            cell = new Rectangle2D.Float(623, curHeight, 66, rowHeight);
            name = "cell-10-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 9, sheetRowCount + 1);

            cell = new Rectangle2D.Float(689, curHeight, 100, rowHeight);
            name = "cell-11-" + curRow;
            areaSearch.addRegion(name, cell);
            areaSearch.extractRegions(page);
            text = areaSearch.getTextForRegion(name);
            areaSearch.removeRegion(name);
            AddCell(sheet, text, 10, sheetRowCount + 1);

            sheetRowCount++;
            curHeight += rowHeight;
        }

        //Rectangle2D.Float issueDate = new Rectangle2D.Float(0, 0, 80, page.getMediaBox().getHeight());
        //stripper2.addRegion("issueDate", issueDate);
        //Rectangle2D.Float amount = new Rectangle2D.Float(80, 0, 30, page.getMediaBox().getHeight());
        //stripper2.addRegion("amount", amount);
        //Rectangle2D.Float citation = new Rectangle2D.Float(110, 0, 40, page.getMediaBox().getHeight());
        //stripper2.addRegion("citation", citation);
        //Rectangle2D.Float violation = new Rectangle2D.Float(150, 0, 120, page.getMediaBox().getHeight());
        //stripper2.addRegion("violation", violation);
        //Rectangle2D.Float comment = new Rectangle2D.Float(270, 0, 120, page.getMediaBox().getHeight());
        //stripper2.addRegion("comment", comment);
        //Rectangle2D.Float warning = new Rectangle2D.Float(390, 0, 40, page.getMediaBox().getHeight());
        //stripper2.addRegion("warning", warning);
        //Rectangle2D.Float license = new Rectangle2D.Float(430, 0, 46, page.getMediaBox().getHeight());
        //stripper2.addRegion("license", license);
        //Rectangle2D.Float lot = new Rectangle2D.Float(476, 0, 82, page.getMediaBox().getHeight());
        //stripper2.addRegion("lot", lot);
        //Rectangle2D.Float make = new Rectangle2D.Float(558, 0, 65, page.getMediaBox().getHeight());
        //stripper2.addRegion("make", make);
        //Rectangle2D.Float officer = new Rectangle2D.Float(623, 0, 66, page.getMediaBox().getHeight());
        //stripper2.addRegion("officer", officer);
        //Rectangle2D.Float state = new Rectangle2D.Float(689, 0, 100, page.getMediaBox().getHeight());
        //stripper2.addRegion("state", state);

        //stripper2.extractRegions(page);
        //String text = stripper2.getTextForRegion("license");

        //Rectangle2D.Float row = new Rectangle2D.Float(0, 156, 80, 10);
        //stripper3.addRegion("row", row);
        //stripper3.extractRegions(page);
        //String text = stripper3.getTextForRegion("row");
        //System.out.println(text);
        counter++;
        curHeight = 116;
        rowCount = 39;
    }
    //AddRow(sheet, text, counter);
    //counter++;
    pd.close();

    System.out.println("Data extracted to Excel, parsing through Excel data...");

    boolean multiline = true;
    while (multiline) {
        multiline = false;
        for (int row = 0; row < sheet.getRows(); row++) {
            Cell cell = sheet.getCell(0, row);
            if (cell.getContents().length() < 5) {
                multiline = true;
                WritableCell cell2 = sheet.getWritableCell(4, row - 1);
                WritableCell cell3 = sheet.getWritableCell(4, row);
                String content = cell2.getContents() + cell3.getContents();
                content = content.replace("\n", "").replace("\r", "");
                Label l = (Label) cell2;
                l.setString(content);
                sheet.removeRow(row);
            }
        }
    }

    System.out.println("Data extraction complete");
    workbook.write();
    workbook.close();
}

From source file:uk.ac.leeds.ccg.andyt.rdl.web.RDL_ParsePDF.java

/**
 * https://svn.apache.org/viewvc/pdfbox/trunk/examples/ Based on
 * https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintURLs.java?view=markup&pathrev=1703066
 *
 * @param f/*from ww  w.j a  v a  2s  .c  o  m*/
 * @param filter
 * @param fis
 * @return
 * @throws IOException
 * @throws TikaException
 * @throws SAXException
 */
public static ArrayList<String[]> parseForLinks(File f, String filter, FileInputStream fis)
        throws IOException, TikaException, SAXException {
    ArrayList<String[]> result;
    result = new ArrayList<String[]>();
    PDDocument doc = PDDocument.load(f);
    int pageNum = 0;
    for (PDPage page : doc.getPages()) {
        pageNum++;

        //            if (pageNum == 11) { //Degug test hack
        System.out.println("Parsing page " + pageNum);
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        List<PDAnnotation> annotations = page.getAnnotations();
        //first setup text extraction regions
        for (int j = 0; j < annotations.size(); j++) {
            PDAnnotation annot = annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                PDAnnotationLink link = (PDAnnotationLink) annot;
                PDRectangle rect = link.getRectangle();
                //need to reposition link rectangle to match text space
                float x = rect.getLowerLeftX();
                float y = rect.getUpperRightY();
                float width = rect.getWidth();
                float height = rect.getHeight();
                int rotation = page.getRotation();
                if (rotation == 0) {
                    PDRectangle pageSize = page.getMediaBox();
                    y = pageSize.getHeight() - y;
                } else if (rotation == 90) {
                    //do nothing
                }

                //Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                // Rounding here could be a problem!
                Rectangle2D.Double awtRect = new Rectangle2D.Double(x, y, width, height);
                stripper.addRegion("" + j, awtRect);
            }
        }

        stripper.extractRegions(page);

        for (int j = 0; j < annotations.size(); j++) {
            PDAnnotation annot = annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                PDAnnotationLink link = (PDAnnotationLink) annot;
                PDAction action = link.getAction();
                if (action == null) {
                    System.out.println(link.getContents());
                    System.out.println(annot.getClass().getName());
                    System.out.println(annot.getAnnotationName());
                    //System.out.println(annot.getNormalAppearanceStream().toString());
                    System.out.println(annot.getContents());
                    System.out.println(annot.getSubtype());
                } else {
                    String urlText = stripper.getTextForRegion("" + j);
                    if (action instanceof PDActionURI) {
                        PDActionURI uri = (PDActionURI) action;
                        String url;
                        url = uri.getURI();
                        if (url.contains(filter)) {
                            String[] partResult;
                            partResult = new String[3];
                            partResult[0] = "Page " + pageNum;
                            partResult[1] = "urlText " + urlText;
                            partResult[2] = "URL " + uri.getURI();
                            System.out.println(partResult[0]);
                            System.out.println(partResult[1]);
                            System.out.println(partResult[2]);
                            System.out.println("URL " + uri.getURI());
                            result.add(partResult);
                        } else {
                            System.out.println("URL " + uri.getURI());
                        }
                    } else {
                        System.out.println(action.getType());
                    }
                }
            } else {
                System.out.println(annot.getClass().getName());
                System.out.println(annot.getAnnotationName());
                System.out.println(annot.getContents());
                System.out.println(annot.getSubtype());
            }
        }

        //}
    }
    //       PDDocument doc = PDDocument.load(f);
    //        int pageNum = 0;
    //        for (PDPage page : doc.getPages()) {
    //            pageNum++;
    //            List<PDAnnotation> annotations = page.getAnnotations();
    //
    //            for (PDAnnotation annotation : annotations) {
    //                PDAnnotation annot = annotation;
    //                if (annot instanceof PDAnnotationLink) {
    //                    PDAnnotationLink link = (PDAnnotationLink) annot;
    //                    PDAction action = link.getAction();
    //                    if (action instanceof PDActionURI) {
    //                        PDActionURI uri = (PDActionURI) action;
    //                        String oldURI = uri.getURI();
    //                        String name = annot.getAnnotationName();
    //                        String contents = annot.getContents();
    //                        PDAppearanceStream a = annot.getNormalAppearanceStream();
    //                        //String newURI = "http://pdfbox.apache.org";
    //                        System.out.println(oldURI + " " + name + " " + contents);
    //                        //uri.setURI(newURI);
    //                    }
    //                }
    //            }
    //        }

    //        result = parseWithTika(fis);
    //XMPSchema schema;
    //schema = new XMPSchema();
    //List<String> XMPBagOrSeqList;
    //XMPBagOrSeqList = getXMPBagOrSeqList(XMPSchema schema, String name) {

    //        PDDocument tPDDocument;
    //        tPDDocument = PDDocument.load(f);
    //        COSDocument tCOSDocument;
    //        tCOSDocument = tPDDocument.getDocument();

    //        String header;
    //        header = tCOSDocument.getHeaderString();
    //        System.out.println(header);

    //        PDDocumentCatalog tPDDocumentCatalog;
    //        tPDDocumentCatalog = tPDDocument.getDocumentCatalog();
    //        PDDocumentNameDictionary tPDDocumentNameDictionary;
    //        tPDDocumentNameDictionary = tPDDocumentCatalog.getNames();

    //        COSDictionary tCOSDictionary;
    //        tCOSDictionary = tPDDocumentNameDictionary.getCOSDictionary();
    //tCOSDictionary.
    //        PDPageNode tPDPageNode;
    //        tPDPageNode = tPDDocumentCatalog.getPages();

    //        List<COSObject> tCOSObjects;
    //        tCOSObjects = tCOSDocument.getObjects();
    //        int n;
    //        n = tCOSObjects.size();
    //        System.out.println(n);
    //        COSObject aCOSObject;
    //        String s;
    //        for (int i = 0; i < n; i++) {
    //            aCOSObject = tCOSObjects.get(i);
    //            s = aCOSObject.toString();
    //            System.out.println(s);
    //        }

    //        XMPMetadata tXMPMetadata;
    //        tXMPMetadata = getXMPMetadata(tPDDocument);

    //        Document XMPDocument;
    //        XMPDocument = tXMPMetadata.getXMPDocument();
    //        Node n;
    //        n = XMPDocument.getFirstChild();
    //        parseNode(n);
    return result;
}

From source file:uk.ac.leeds.ccg.andyt.rdl.web.RDL_ParsePDF.java

/**
 * Converts PDF to a String a page at a time.
 *
 * @param f/* www . j  a  v a 2  s.co m*/
 * @return
 * @throws IOException
 */
public static String parseToString(File f) throws IOException {
    String result;
    result = "";
    PDDocument doc = PDDocument.load(f);
    PDFTextStripperByArea stripper = new PDFTextStripperByArea();
    stripper.setSortByPosition(true);
    //Rectangle rect = new Rectangle(10, 280, 275, 60);
    //PDPage firstPage = doc.getPage(0);
    for (PDPage page : doc.getPages()) {
        PDRectangle aPDRectangle;
        aPDRectangle = page.getBBox();
        Rectangle2D.Double rect = new Rectangle2D.Double(aPDRectangle.getLowerLeftX(),
                aPDRectangle.getLowerLeftY(),
                //aPDRectangle.getUpperRightY(),
                aPDRectangle.getWidth(), aPDRectangle.getHeight());
        stripper.addRegion("class1", rect);
        stripper.extractRegions(page);
        System.out.println("<Text in the area:" + rect + ">");
        String text;
        text = stripper.getTextForRegion("class1");
        System.out.println(text);
        System.out.println("</Text in the area:" + rect + ">");
        result += text;
    }
    return result;
}

From source file:uk.org.openeyes.PDFFunctions.java

private String getTextArea(PDPage page, Rectangle titleArea) throws IOException {
    PDFTextStripperByArea stripper = new PDFTextStripperByArea();
    stripper.setSortByPosition(true);/*from w w w.j  a v  a2s.c  o m*/
    stripper.addRegion("area", titleArea);
    stripper.extractRegions(page);
    return stripper.getTextForRegion("area");
}