Example usage for org.apache.poi.openxml4j.opc PackageRelationship getTargetURI

List of usage examples for org.apache.poi.openxml4j.opc PackageRelationship getTargetURI

Introduction

In this page you can find the example usage for org.apache.poi.openxml4j.opc PackageRelationship getTargetURI.

Prototype

public URI getTargetURI() 

Source Link

Usage

From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java

License:Apache License

/**
 * In PowerPoint files, slides have things embedded in them, and slide
 * drawings which have the images/*w  ww .  j  av  a 2s. c o m*/
 */
@Override
protected List<PackagePart> getMainDocumentParts() throws TikaException {
    List<PackagePart> parts = new ArrayList<PackagePart>();
    XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();
    XSLFSlideShow document = null;
    try {
        document = slideShow._getXSLFSlideShow(); // TODO Avoid this in future
    } catch (Exception e) {
        throw new TikaException(e.getMessage()); // Shouldn't happen
    }

    for (CTSlideIdListEntry ctSlide : document.getSlideReferences().getSldIdList()) {
        // Add the slide
        PackagePart slidePart;
        try {
            slidePart = document.getSlidePart(ctSlide);
        } catch (IOException e) {
            throw new TikaException("Broken OOXML file", e);
        } catch (XmlException xe) {
            throw new TikaException("Broken OOXML file", xe);
        }
        parts.add(slidePart);

        // If it has drawings, return those too
        try {
            for (PackageRelationship rel : slidePart
                    .getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) {
                if (rel.getTargetMode() == TargetMode.INTERNAL) {
                    PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
                    parts.add(rel.getPackage().getPart(relName));
                }
            }
        } catch (InvalidFormatException e) {
            throw new TikaException("Broken OOXML file", e);
        }
    }

    return parts;
}

From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java

License:Apache License

/**
 * In Excel files, sheets have things embedded in them, and sheet drawings
 * which have the images/*from   ww w. j  a v a2 s. com*/
 */
@Override
protected List<PackagePart> getMainDocumentParts() throws TikaException {
    List<PackagePart> parts = new ArrayList<PackagePart>();
    for (PackagePart part : sheetParts) {
        // Add the sheet
        parts.add(part);

        // If it has drawings, return those too
        try {
            for (PackageRelationship rel : part.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation())) {
                if (rel.getTargetMode() == TargetMode.INTERNAL) {
                    PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
                    parts.add(rel.getPackage().getPart(relName));
                }
            }
            for (PackageRelationship rel : part
                    .getRelationshipsByType(XSSFRelation.VML_DRAWINGS.getRelation())) {
                if (rel.getTargetMode() == TargetMode.INTERNAL) {
                    PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
                    parts.add(rel.getPackage().getPart(relName));
                }
            }
        } catch (InvalidFormatException e) {
            throw new TikaException("Broken OOXML file", e);
        }
    }

    return parts;
}

From source file:org.apache.tika.parser.microsoft.ooxml.SXSLFPowerPointExtractorDecorator.java

License:Apache License

private void addSlideParts(PackagePart slidePart, List<PackagePart> parts) {

    for (String relation : new String[] { XSLFRelation.VML_DRAWING.getRelation(),
            XSLFRelation.SLIDE_LAYOUT.getRelation(), XSLFRelation.NOTES_MASTER.getRelation(),
            XSLFRelation.NOTES.getRelation() }) {
        try {/*from  w  w  w.j a va  2 s  . com*/
            for (PackageRelationship packageRelationship : slidePart.getRelationshipsByType(relation)) {
                if (packageRelationship.getTargetMode() == TargetMode.INTERNAL) {
                    PackagePartName relName = PackagingURIHelper
                            .createPartName(packageRelationship.getTargetURI());
                    parts.add(packageRelationship.getPackage().getPart(relName));
                }
            }
        } catch (InvalidFormatException e) {

        }
    }
    //and slide of course
    parts.add(slidePart);

}

From source file:org.apache.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java

License:Apache License

/**
 * In PowerPoint files, slides have things embedded in them,
 * and slide drawings which have the images
 */// w w  w.ja  va2 s  .  c o m
@Override
protected List<PackagePart> getMainDocumentParts() throws TikaException {
    List<PackagePart> parts = new ArrayList<>();
    XSLFSlideShow document = null;
    try {
        document = new XSLFSlideShow(extractor.getPackage());
    } catch (Exception e) {
        throw new TikaException(e.getMessage()); // Shouldn't happen
    }

    CTSlideIdList ctSlideIdList = document.getSlideReferences();
    if (ctSlideIdList != null) {
        for (int i = 0; i < ctSlideIdList.sizeOfSldIdArray(); i++) {
            CTSlideIdListEntry ctSlide = ctSlideIdList.getSldIdArray(i);
            // Add the slide
            PackagePart slidePart;
            try {
                slidePart = document.getSlidePart(ctSlide);
            } catch (IOException e) {
                throw new TikaException("Broken OOXML file", e);
            } catch (XmlException xe) {
                throw new TikaException("Broken OOXML file", xe);
            }
            parts.add(slidePart);

            // If it has drawings, return those too
            try {
                for (PackageRelationship rel : slidePart
                        .getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) {
                    if (rel.getTargetMode() == TargetMode.INTERNAL) {
                        PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
                        parts.add(rel.getPackage().getPart(relName));
                    }
                }
            } catch (InvalidFormatException e) {
                throw new TikaException("Broken OOXML file", e);
            }
        }
    }
    return parts;
}

From source file:org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor.java

License:Apache License

private Map<String, String> loadHyperlinkRelationships(PackagePart bodyPart) {
    Map<String, String> hyperlinks = new HashMap<>();
    try {//from   www .j  av a2  s.c om
        PackageRelationshipCollection prc = bodyPart
                .getRelationshipsByType(XWPFRelation.HYPERLINK.getRelation());
        for (int i = 0; i < prc.size(); i++) {
            PackageRelationship pr = prc.getRelationship(i);
            if (pr == null) {
                continue;
            }
            String id = pr.getId();
            String url = (pr.getTargetURI() == null) ? null : pr.getTargetURI().toString();
            if (id != null && url != null) {
                hyperlinks.put(id, url);
            }
        }
    } catch (InvalidFormatException e) {
    }
    return hyperlinks;
}

From source file:org.openflexo.docxparser.OpenXml2Html.java

License:Open Source License

private ParsedHtml getHtmlFromW_HyperlinkElement(Element element) throws InvalidElementException {
    if (OpenXmlTag.getOpenXmlTag(element) != OpenXmlTag.w_hyperlink) {
        throw new InvalidElementException(
                "Cannot transform element to html, expecting element w:hyperlink and get '"
                        + element.getQualifiedName() + "'");
    }//w  ww  .  j  a v  a  2 s  . c o  m

    ParsedHtml parsedHtml = new ParsedHtml();

    String href = null;

    String linkRid = element.attributeValue(DocxQName.getQName(OpenXmlTag.r_id));
    if (linkRid != null) {
        PackageRelationship linkRelationship = documentPart.getRelationship(linkRid);
        if (linkRelationship != null) {
            href = linkRelationship.getTargetURI().toString();
        }
    }

    if (href == null) { // Anchor ?
        String anchor = element.attributeValue(DocxQName.getQName(OpenXmlTag.w_anchor));
        if (anchor != null) {
            href = "#" + anchor;
        }
    }

    String closeTag;

    if (href != null) {
        String target = element.attributeValue(DocxQName.getQName(OpenXmlTag.w_tgtFrame));
        String title = element.attributeValue(DocxQName.getQName(OpenXmlTag.w_tooltip));

        parsedHtml.appendHtml("<a href=\"" + href + "\"");
        if (target != null) {
            parsedHtml.appendHtml(" target=\"" + StringEscapeUtils.escapeHtml(target) + "\"");
        }
        if (title != null) {
            parsedHtml.appendHtml(" title=\"" + StringEscapeUtils.escapeHtml(title) + "\"");
        }

        parsedHtml.appendHtml(">");

        closeTag = "</a>";
    } else {
        logger.log(Level.WARNING,
                "OpenXml to Html: cannot get hyperlink relationship with id '" + linkRid + "'");
        closeTag = "";
    }

    for (Iterator<?> iterator = element.elementIterator(); iterator.hasNext();) {
        Element childElement = (Element) iterator.next();
        parsedHtml.append(getRecursiveHtml(childElement));
    }

    parsedHtml.appendHtml(closeTag);

    return parsedHtml;
}

From source file:org.openflexo.docxparser.OpenXml2Html.java

License:Open Source License

private ParsedHtml getHtmlFromW_DrawingElement(Element element) {
    if (OpenXmlTag.getOpenXmlTag(element) != OpenXmlTag.w_drawing) {
        throw new InvalidElementException(
                "Cannot transform element to html, expecting element w:drawing and get '"
                        + element.getQualifiedName() + "'");
    }/*  w  ww .ja v a  2 s  . c o m*/

    ParsedHtml parsedHtml = new ParsedHtml();

    try {
        Dom4jXPath xpath = new Dom4jXPath("descendant::a:blip");
        xpath.addNamespace(DocxXmlUtil.NAMESPACE_DRAWINGMAIN.getPrefix(),
                DocxXmlUtil.NAMESPACE_DRAWINGMAIN.getURI());

        Element ablipElement = (Element) xpath.selectSingleNode(element);
        if (ablipElement == null) {
            logger.warning("Cannot handle drawing tag: a:blip element not found");
            return parsedHtml;
        }

        String imageRid = ablipElement.attributeValue(DocxQName.getQName(OpenXmlTag.r_embed));
        if (imageRid == null) {
            logger.warning("Cannot handle drawing tag: r:embed attribute in a:blip element not found");
            return parsedHtml;
        }

        PackageRelationship imageRelationship = documentPart.getRelationship(imageRid);
        if (imageRelationship == null) {
            logger.warning("Cannot handle drawing tag: imageRelationship with id '" + imageRid + "' not found");
            return parsedHtml;
        }

        PackagePartName imagePartName = PackagingURIHelper.createPartName(imageRelationship.getTargetURI());
        PackagePart imagePart = documentPart.getPackage().getPart(imagePartName);
        String imageFileName;
        if (imagePart != null) {
            byte[] imageBytes = DocxXmlUtil.getByteArrayFromInputStream(imagePart.getInputStream());

            imageFileName = imagePartName.getName().substring(imagePartName.getName().lastIndexOf('/') + 1);

            parsedHtml.addNeededResource(new ParsedHtmlResource(imageFileName, imageBytes));
        } else {
            imageFileName = "";
        }
        Integer imageWidth = null;
        Integer imageHeight = null;

        Element extentElement = (Element) element.selectSingleNode("wp:inline/wp:extent");
        if (extentElement != null) {
            String imageCx = extentElement.attributeValue("cx");
            String imageCy = extentElement.attributeValue("cy");

            if (imageCx != null) {
                imageWidth = getEnglishMetricUnitInPixel(imageCx);
            }
            if (imageCy != null) {
                imageHeight = getEnglishMetricUnitInPixel(imageCy);
            }
        }

        parsedHtml.appendHtml("<img src=\"" + resourcesDirectory + imageFileName + "\"");
        if (imageWidth != null) {
            parsedHtml.appendHtml(" width=\"" + imageWidth + "\"");
        }
        if (imageHeight != null) {
            parsedHtml.appendHtml(" height=\"" + imageHeight + "\"");
        }
        parsedHtml.appendHtml(" />");

        return parsedHtml;
    } catch (InvalidFormatException e) {
        logger.log(Level.WARNING, "Cannot handle drawing tag: InvalidFormatException catched", e);
        return new ParsedHtml();
    } catch (IOException e) {
        logger.log(Level.WARNING, "Cannot handle drawing tag: IOException catched", e);
        return new ParsedHtml();
    } catch (JaxenException e) {
        logger.log(Level.WARNING, "Cannot handle drawing tag: JaxenException catched", e);
        return new ParsedHtml();
    }
}