Example usage for org.apache.pdfbox.pdmodel.interactive.annotation PDAnnotationLink getContents

List of usage examples for org.apache.pdfbox.pdmodel.interactive.annotation PDAnnotationLink getContents

Introduction

In this page you can find the example usage for org.apache.pdfbox.pdmodel.interactive.annotation PDAnnotationLink getContents.

Prototype

public String getContents() 

Source Link

Document

Get the "contents" of the field.

Usage

From source file:uk.ac.leeds.ccg.andyt.rdl.web.RDL_ParsePDF.java

/**
 * https://svn.apache.org/viewvc/pdfbox/trunk/examples/ Based on
 * https://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/pdmodel/PrintURLs.java?view=markup&pathrev=1703066
 *
 * @param f//from  w  w  w .  j a  v a 2 s  . co  m
 * @param filter
 * @param fis
 * @return
 * @throws IOException
 * @throws TikaException
 * @throws SAXException
 */
public static ArrayList<String[]> parseForLinks(File f, String filter, FileInputStream fis)
        throws IOException, TikaException, SAXException {
    ArrayList<String[]> result;
    result = new ArrayList<String[]>();
    PDDocument doc = PDDocument.load(f);
    int pageNum = 0;
    for (PDPage page : doc.getPages()) {
        pageNum++;

        //            if (pageNum == 11) { //Degug test hack
        System.out.println("Parsing page " + pageNum);
        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
        List<PDAnnotation> annotations = page.getAnnotations();
        //first setup text extraction regions
        for (int j = 0; j < annotations.size(); j++) {
            PDAnnotation annot = annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                PDAnnotationLink link = (PDAnnotationLink) annot;
                PDRectangle rect = link.getRectangle();
                //need to reposition link rectangle to match text space
                float x = rect.getLowerLeftX();
                float y = rect.getUpperRightY();
                float width = rect.getWidth();
                float height = rect.getHeight();
                int rotation = page.getRotation();
                if (rotation == 0) {
                    PDRectangle pageSize = page.getMediaBox();
                    y = pageSize.getHeight() - y;
                } else if (rotation == 90) {
                    //do nothing
                }

                //Rectangle2D.Float awtRect = new Rectangle2D.Float(x, y, width, height);
                // Rounding here could be a problem!
                Rectangle2D.Double awtRect = new Rectangle2D.Double(x, y, width, height);
                stripper.addRegion("" + j, awtRect);
            }
        }

        stripper.extractRegions(page);

        for (int j = 0; j < annotations.size(); j++) {
            PDAnnotation annot = annotations.get(j);
            if (annot instanceof PDAnnotationLink) {
                PDAnnotationLink link = (PDAnnotationLink) annot;
                PDAction action = link.getAction();
                if (action == null) {
                    System.out.println(link.getContents());
                    System.out.println(annot.getClass().getName());
                    System.out.println(annot.getAnnotationName());
                    //System.out.println(annot.getNormalAppearanceStream().toString());
                    System.out.println(annot.getContents());
                    System.out.println(annot.getSubtype());
                } else {
                    String urlText = stripper.getTextForRegion("" + j);
                    if (action instanceof PDActionURI) {
                        PDActionURI uri = (PDActionURI) action;
                        String url;
                        url = uri.getURI();
                        if (url.contains(filter)) {
                            String[] partResult;
                            partResult = new String[3];
                            partResult[0] = "Page " + pageNum;
                            partResult[1] = "urlText " + urlText;
                            partResult[2] = "URL " + uri.getURI();
                            System.out.println(partResult[0]);
                            System.out.println(partResult[1]);
                            System.out.println(partResult[2]);
                            System.out.println("URL " + uri.getURI());
                            result.add(partResult);
                        } else {
                            System.out.println("URL " + uri.getURI());
                        }
                    } else {
                        System.out.println(action.getType());
                    }
                }
            } else {
                System.out.println(annot.getClass().getName());
                System.out.println(annot.getAnnotationName());
                System.out.println(annot.getContents());
                System.out.println(annot.getSubtype());
            }
        }

        //}
    }
    //       PDDocument doc = PDDocument.load(f);
    //        int pageNum = 0;
    //        for (PDPage page : doc.getPages()) {
    //            pageNum++;
    //            List<PDAnnotation> annotations = page.getAnnotations();
    //
    //            for (PDAnnotation annotation : annotations) {
    //                PDAnnotation annot = annotation;
    //                if (annot instanceof PDAnnotationLink) {
    //                    PDAnnotationLink link = (PDAnnotationLink) annot;
    //                    PDAction action = link.getAction();
    //                    if (action instanceof PDActionURI) {
    //                        PDActionURI uri = (PDActionURI) action;
    //                        String oldURI = uri.getURI();
    //                        String name = annot.getAnnotationName();
    //                        String contents = annot.getContents();
    //                        PDAppearanceStream a = annot.getNormalAppearanceStream();
    //                        //String newURI = "http://pdfbox.apache.org";
    //                        System.out.println(oldURI + " " + name + " " + contents);
    //                        //uri.setURI(newURI);
    //                    }
    //                }
    //            }
    //        }

    //        result = parseWithTika(fis);
    //XMPSchema schema;
    //schema = new XMPSchema();
    //List<String> XMPBagOrSeqList;
    //XMPBagOrSeqList = getXMPBagOrSeqList(XMPSchema schema, String name) {

    //        PDDocument tPDDocument;
    //        tPDDocument = PDDocument.load(f);
    //        COSDocument tCOSDocument;
    //        tCOSDocument = tPDDocument.getDocument();

    //        String header;
    //        header = tCOSDocument.getHeaderString();
    //        System.out.println(header);

    //        PDDocumentCatalog tPDDocumentCatalog;
    //        tPDDocumentCatalog = tPDDocument.getDocumentCatalog();
    //        PDDocumentNameDictionary tPDDocumentNameDictionary;
    //        tPDDocumentNameDictionary = tPDDocumentCatalog.getNames();

    //        COSDictionary tCOSDictionary;
    //        tCOSDictionary = tPDDocumentNameDictionary.getCOSDictionary();
    //tCOSDictionary.
    //        PDPageNode tPDPageNode;
    //        tPDPageNode = tPDDocumentCatalog.getPages();

    //        List<COSObject> tCOSObjects;
    //        tCOSObjects = tCOSDocument.getObjects();
    //        int n;
    //        n = tCOSObjects.size();
    //        System.out.println(n);
    //        COSObject aCOSObject;
    //        String s;
    //        for (int i = 0; i < n; i++) {
    //            aCOSObject = tCOSObjects.get(i);
    //            s = aCOSObject.toString();
    //            System.out.println(s);
    //        }

    //        XMPMetadata tXMPMetadata;
    //        tXMPMetadata = getXMPMetadata(tPDDocument);

    //        Document XMPDocument;
    //        XMPDocument = tXMPMetadata.getXMPDocument();
    //        Node n;
    //        n = XMPDocument.getFirstChild();
    //        parseNode(n);
    return result;
}