List of usage examples for org.apache.poi.openxml4j.opc PackageRelationship getTargetURI
public URI getTargetURI()
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java
License:Apache License
/** * In PowerPoint files, slides have things embedded in them, and slide * drawings which have the images/*w ww . j av a 2s. c o m*/ */ @Override protected List<PackagePart> getMainDocumentParts() throws TikaException { List<PackagePart> parts = new ArrayList<PackagePart>(); XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument(); XSLFSlideShow document = null; try { document = slideShow._getXSLFSlideShow(); // TODO Avoid this in future } catch (Exception e) { throw new TikaException(e.getMessage()); // Shouldn't happen } for (CTSlideIdListEntry ctSlide : document.getSlideReferences().getSldIdList()) { // Add the slide PackagePart slidePart; try { slidePart = document.getSlidePart(ctSlide); } catch (IOException e) { throw new TikaException("Broken OOXML file", e); } catch (XmlException xe) { throw new TikaException("Broken OOXML file", xe); } parts.add(slidePart); // If it has drawings, return those too try { for (PackageRelationship rel : slidePart .getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { throw new TikaException("Broken OOXML file", e); } } return parts; }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java
License:Apache License
/** * In Excel files, sheets have things embedded in them, and sheet drawings * which have the images/*from ww w. j a v a2 s. com*/ */ @Override protected List<PackagePart> getMainDocumentParts() throws TikaException { List<PackagePart> parts = new ArrayList<PackagePart>(); for (PackagePart part : sheetParts) { // Add the sheet parts.add(part); // If it has drawings, return those too try { for (PackageRelationship rel : part.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } for (PackageRelationship rel : part .getRelationshipsByType(XSSFRelation.VML_DRAWINGS.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { throw new TikaException("Broken OOXML file", e); } } return parts; }
From source file:org.apache.tika.parser.microsoft.ooxml.SXSLFPowerPointExtractorDecorator.java
License:Apache License
private void addSlideParts(PackagePart slidePart, List<PackagePart> parts) { for (String relation : new String[] { XSLFRelation.VML_DRAWING.getRelation(), XSLFRelation.SLIDE_LAYOUT.getRelation(), XSLFRelation.NOTES_MASTER.getRelation(), XSLFRelation.NOTES.getRelation() }) { try {/*from w w w.j a va 2 s . com*/ for (PackageRelationship packageRelationship : slidePart.getRelationshipsByType(relation)) { if (packageRelationship.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper .createPartName(packageRelationship.getTargetURI()); parts.add(packageRelationship.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { } } //and slide of course parts.add(slidePart); }
From source file:org.apache.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java
License:Apache License
/** * In PowerPoint files, slides have things embedded in them, * and slide drawings which have the images */// w w w.ja va2 s . c o m @Override protected List<PackagePart> getMainDocumentParts() throws TikaException { List<PackagePart> parts = new ArrayList<>(); XSLFSlideShow document = null; try { document = new XSLFSlideShow(extractor.getPackage()); } catch (Exception e) { throw new TikaException(e.getMessage()); // Shouldn't happen } CTSlideIdList ctSlideIdList = document.getSlideReferences(); if (ctSlideIdList != null) { for (int i = 0; i < ctSlideIdList.sizeOfSldIdArray(); i++) { CTSlideIdListEntry ctSlide = ctSlideIdList.getSldIdArray(i); // Add the slide PackagePart slidePart; try { slidePart = document.getSlidePart(ctSlide); } catch (IOException e) { throw new TikaException("Broken OOXML file", e); } catch (XmlException xe) { throw new TikaException("Broken OOXML file", xe); } parts.add(slidePart); // If it has drawings, return those too try { for (PackageRelationship rel : slidePart .getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { throw new TikaException("Broken OOXML file", e); } } } return parts; }
From source file:org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor.java
License:Apache License
private Map<String, String> loadHyperlinkRelationships(PackagePart bodyPart) { Map<String, String> hyperlinks = new HashMap<>(); try {//from www .j av a2 s.c om PackageRelationshipCollection prc = bodyPart .getRelationshipsByType(XWPFRelation.HYPERLINK.getRelation()); for (int i = 0; i < prc.size(); i++) { PackageRelationship pr = prc.getRelationship(i); if (pr == null) { continue; } String id = pr.getId(); String url = (pr.getTargetURI() == null) ? null : pr.getTargetURI().toString(); if (id != null && url != null) { hyperlinks.put(id, url); } } } catch (InvalidFormatException e) { } return hyperlinks; }
From source file:org.openflexo.docxparser.OpenXml2Html.java
License:Open Source License
private ParsedHtml getHtmlFromW_HyperlinkElement(Element element) throws InvalidElementException { if (OpenXmlTag.getOpenXmlTag(element) != OpenXmlTag.w_hyperlink) { throw new InvalidElementException( "Cannot transform element to html, expecting element w:hyperlink and get '" + element.getQualifiedName() + "'"); }//w ww . j a v a 2 s . c o m ParsedHtml parsedHtml = new ParsedHtml(); String href = null; String linkRid = element.attributeValue(DocxQName.getQName(OpenXmlTag.r_id)); if (linkRid != null) { PackageRelationship linkRelationship = documentPart.getRelationship(linkRid); if (linkRelationship != null) { href = linkRelationship.getTargetURI().toString(); } } if (href == null) { // Anchor ? String anchor = element.attributeValue(DocxQName.getQName(OpenXmlTag.w_anchor)); if (anchor != null) { href = "#" + anchor; } } String closeTag; if (href != null) { String target = element.attributeValue(DocxQName.getQName(OpenXmlTag.w_tgtFrame)); String title = element.attributeValue(DocxQName.getQName(OpenXmlTag.w_tooltip)); parsedHtml.appendHtml("<a href=\"" + href + "\""); if (target != null) { parsedHtml.appendHtml(" target=\"" + StringEscapeUtils.escapeHtml(target) + "\""); } if (title != null) { parsedHtml.appendHtml(" title=\"" + StringEscapeUtils.escapeHtml(title) + "\""); } parsedHtml.appendHtml(">"); closeTag = "</a>"; } else { logger.log(Level.WARNING, "OpenXml to Html: cannot get hyperlink relationship with id '" + linkRid + "'"); closeTag = ""; } for (Iterator<?> iterator = element.elementIterator(); iterator.hasNext();) { Element childElement = (Element) iterator.next(); parsedHtml.append(getRecursiveHtml(childElement)); } parsedHtml.appendHtml(closeTag); return parsedHtml; }
From source file:org.openflexo.docxparser.OpenXml2Html.java
License:Open Source License
private ParsedHtml getHtmlFromW_DrawingElement(Element element) { if (OpenXmlTag.getOpenXmlTag(element) != OpenXmlTag.w_drawing) { throw new InvalidElementException( "Cannot transform element to html, expecting element w:drawing and get '" + element.getQualifiedName() + "'"); }/* w ww .ja v a 2 s . c o m*/ ParsedHtml parsedHtml = new ParsedHtml(); try { Dom4jXPath xpath = new Dom4jXPath("descendant::a:blip"); xpath.addNamespace(DocxXmlUtil.NAMESPACE_DRAWINGMAIN.getPrefix(), DocxXmlUtil.NAMESPACE_DRAWINGMAIN.getURI()); Element ablipElement = (Element) xpath.selectSingleNode(element); if (ablipElement == null) { logger.warning("Cannot handle drawing tag: a:blip element not found"); return parsedHtml; } String imageRid = ablipElement.attributeValue(DocxQName.getQName(OpenXmlTag.r_embed)); if (imageRid == null) { logger.warning("Cannot handle drawing tag: r:embed attribute in a:blip element not found"); return parsedHtml; } PackageRelationship imageRelationship = documentPart.getRelationship(imageRid); if (imageRelationship == null) { logger.warning("Cannot handle drawing tag: imageRelationship with id '" + imageRid + "' not found"); return parsedHtml; } PackagePartName imagePartName = PackagingURIHelper.createPartName(imageRelationship.getTargetURI()); PackagePart imagePart = documentPart.getPackage().getPart(imagePartName); String imageFileName; if (imagePart != null) { byte[] imageBytes = DocxXmlUtil.getByteArrayFromInputStream(imagePart.getInputStream()); imageFileName = imagePartName.getName().substring(imagePartName.getName().lastIndexOf('/') + 1); parsedHtml.addNeededResource(new ParsedHtmlResource(imageFileName, imageBytes)); } else { imageFileName = ""; } Integer imageWidth = null; Integer imageHeight = null; Element extentElement = (Element) element.selectSingleNode("wp:inline/wp:extent"); if (extentElement != null) { String imageCx = extentElement.attributeValue("cx"); String imageCy = extentElement.attributeValue("cy"); if (imageCx != null) { imageWidth = getEnglishMetricUnitInPixel(imageCx); } if (imageCy != null) { imageHeight = getEnglishMetricUnitInPixel(imageCy); } } parsedHtml.appendHtml("<img src=\"" + resourcesDirectory + imageFileName + "\""); if (imageWidth != null) { parsedHtml.appendHtml(" width=\"" + imageWidth + "\""); } if (imageHeight != null) { parsedHtml.appendHtml(" height=\"" + imageHeight + "\""); } parsedHtml.appendHtml(" />"); return parsedHtml; } catch (InvalidFormatException e) { logger.log(Level.WARNING, "Cannot handle drawing tag: InvalidFormatException catched", e); return new ParsedHtml(); } catch (IOException e) { logger.log(Level.WARNING, "Cannot handle drawing tag: IOException catched", e); return new ParsedHtml(); } catch (JaxenException e) { logger.log(Level.WARNING, "Cannot handle drawing tag: JaxenException catched", e); return new ParsedHtml(); } }