List of usage examples for org.apache.pdfbox.pdmodel.interactive.annotation PDAnnotationFileAttachment getAttachmentName
public String getAttachmentName()
From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTML.java
License:Apache License
@Override protected void endPage(PDPage page) throws IOException { try {//from w w w .jav a 2 s . c o m for (PDAnnotation annotation : page.getAnnotations()) { if (annotation instanceof PDAnnotationFileAttachment) { PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation; PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile(); try { AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "source", "source", "CDATA", "annotation"); extractMultiOSPDEmbeddedFiles(fann.getAttachmentName(), fileSpec, attributes); } catch (SAXException e) { throw new IOExceptionWithCause("file embedded in annotation sax exception", e); } catch (TikaException e) { throw new IOExceptionWithCause("file embedded in annotation tika exception", e); } catch (IOException e) { handleCatchableIOE(e); } } else if (annotation instanceof PDAnnotationWidget) { handleWidget((PDAnnotationWidget) annotation); } // TODO: remove once PDFBOX-1143 is fixed: if (config.getExtractAnnotationText()) { if (annotation instanceof PDAnnotationLink) { PDAnnotationLink annotationlink = (PDAnnotationLink) annotation; if (annotationlink.getAction() != null) { PDAction action = annotationlink.getAction(); if (action instanceof PDActionURI) { //can't currently associate link to text. //for now, extract link and repeat the link as if it //were the visible text PDActionURI uri = (PDActionURI) action; String link = uri.getURI(); if (link != null && link.trim().length() > 0) { xhtml.startElement("div", "class", "annotation"); xhtml.startElement("a", "href", link); xhtml.characters(link); xhtml.endElement("a"); xhtml.endElement("div"); } } } } if (annotation instanceof PDAnnotationMarkup) { PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation; String title = annotationMarkup.getTitlePopup(); String subject = annotationMarkup.getSubject(); String contents = annotationMarkup.getContents(); // TODO: maybe also annotationMarkup.getRichContents()? if (title != null || subject != null || contents != null) { xhtml.startElement("div", "class", "annotation"); if (title != null) { xhtml.startElement("div", "class", "annotationTitle"); xhtml.characters(title); xhtml.endElement("div"); } if (subject != null) { xhtml.startElement("div", "class", "annotationSubject"); xhtml.characters(subject); xhtml.endElement("div"); } if (contents != null) { xhtml.startElement("div", "class", "annotationContents"); xhtml.characters(contents); xhtml.endElement("div"); } xhtml.endElement("div"); } } } } if (config.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) { doOCROnCurrentPage(); } PDPageAdditionalActions pageActions = page.getActions(); if (pageActions != null) { handleDestinationOrAction(pageActions.getC(), ActionTrigger.PAGE_CLOSE); handleDestinationOrAction(pageActions.getO(), ActionTrigger.PAGE_OPEN); } xhtml.endElement("div"); } catch (SAXException | TikaException e) { throw new IOExceptionWithCause("Unable to end a page", e); } catch (IOException e) { exceptions.add(e); } finally { pageIndex++; } }
From source file:org.apache.tika.parser.pdf.AbstractPDF2XHTMLPureJava.java
License:Apache License
@Override protected void endPage(PDPage page) throws IOException { try {//from w w w.ja va 2s .c om for (PDAnnotation annotation : page.getAnnotations()) { if (annotation instanceof PDAnnotationFileAttachment) { PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation; PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile(); try { AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "source", "source", "CDATA", "annotation"); extractMultiOSPDEmbeddedFiles(fann.getAttachmentName(), fileSpec, attributes); } catch (SAXException e) { throw new IOExceptionWithCause("file embedded in annotation sax exception", e); } catch (TikaException e) { throw new IOExceptionWithCause("file embedded in annotation tika exception", e); } catch (IOException e) { handleCatchableIOE(e); } } else if (annotation instanceof PDAnnotationWidget) { handleWidget((PDAnnotationWidget) annotation); } // TODO: remove once PDFBOX-1143 is fixed: if (config.getExtractAnnotationText()) { if (annotation instanceof PDAnnotationLink) { PDAnnotationLink annotationlink = (PDAnnotationLink) annotation; if (annotationlink.getAction() != null) { PDAction action = annotationlink.getAction(); if (action instanceof PDActionURI) { //can't currently associate link to text. //for now, extract link and repeat the link as if it //were the visible text PDActionURI uri = (PDActionURI) action; String link = uri.getURI(); if (link != null && link.trim().length() > 0) { xhtml.startElement("div", "class", "annotation"); xhtml.startElement("a", "href", link); xhtml.characters(link); xhtml.endElement("a"); xhtml.endElement("div"); } } } } if (annotation instanceof PDAnnotationMarkup) { PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation; String title = annotationMarkup.getTitlePopup(); String subject = annotationMarkup.getSubject(); String contents = annotationMarkup.getContents(); // TODO: maybe also annotationMarkup.getRichContents()? if (title != null || subject != null || contents != null) { xhtml.startElement("div", "class", "annotation"); if (title != null) { xhtml.startElement("div", "class", "annotationTitle"); xhtml.characters(title); xhtml.endElement("div"); } if (subject != null) { xhtml.startElement("div", "class", "annotationSubject"); xhtml.characters(subject); xhtml.endElement("div"); } if (contents != null) { xhtml.startElement("div", "class", "annotationContents"); xhtml.characters(contents); xhtml.endElement("div"); } xhtml.endElement("div"); } } } } PDPageAdditionalActions pageActions = page.getActions(); if (pageActions != null) { handleDestinationOrAction(pageActions.getC(), ActionTrigger.PAGE_CLOSE); handleDestinationOrAction(pageActions.getO(), ActionTrigger.PAGE_OPEN); } xhtml.endElement("div"); } catch (SAXException | TikaException e) { throw new IOExceptionWithCause("Unable to end a page", e); } catch (IOException e) { exceptions.add(e); } finally { pageIndex++; } }