List of usage examples for org.apache.poi.openxml4j.opc PackagePart getRelatedPart
public PackagePart getRelatedPart(PackageRelationship rel) throws InvalidFormatException
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.java
License:Apache License
private void handleEmbeddedParts(ContentHandler handler) throws TikaException, IOException, SAXException { try {//from www. jav a 2s .c o m for (PackagePart source : getMainDocumentParts()) { for (PackageRelationship rel : source.getRelationships()) { URI sourceURI = rel.getSourceURI(); String sourceDesc; if (sourceURI != null) { sourceDesc = getJustFileName(sourceURI.getPath()); if (sourceDesc.startsWith("slide")) { sourceDesc += "_"; } else { sourceDesc = ""; } } else { sourceDesc = ""; } if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePart target; try { target = source.getRelatedPart(rel); } catch (IllegalArgumentException ex) { continue; } String type = rel.getRelationshipType(); if (RELATION_OLE_OBJECT.equals(type) && TYPE_OLE_OBJECT.equals(target.getContentType())) { handleEmbeddedOLE(target, handler, sourceDesc + rel.getId()); } else if (RELATION_AUDIO.equals(type) || RELATION_IMAGE.equals(type) || RELATION_PACKAGE.equals(type) || RELATION_OLE_OBJECT.equals(type)) { handleEmbeddedFile(target, handler, sourceDesc + rel.getId()); } } } } } catch (InvalidFormatException e) { throw new TikaException("Broken OOXML file", e); } }
From source file:org.apache.tika.parser.microsoft.ooxml.SXSLFPowerPointExtractorDecorator.java
License:Apache License
/** * This should handle the comments, master, notes, etc * * @param contentType/*from w w w .jav a2 s. c om*/ * @param xhtmlClassLabel * @param parentPart * @param contentHandler */ private void handleBasicRelatedParts(String contentType, String xhtmlClassLabel, PackagePart parentPart, ContentHandler contentHandler) throws SAXException { PackageRelationshipCollection relatedPartPRC = null; try { relatedPartPRC = parentPart.getRelationshipsByType(contentType); } catch (InvalidFormatException e) { //swallow } if (relatedPartPRC != null && relatedPartPRC.size() > 0) { AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", xhtmlClassLabel); contentHandler.startElement("", "div", "div", attributes); for (int i = 0; i < relatedPartPRC.size(); i++) { PackageRelationship relatedPartPackageRelationship = relatedPartPRC.getRelationship(i); try { PackagePart relatedPartPart = parentPart.getRelatedPart(relatedPartPackageRelationship); try (InputStream stream = relatedPartPart.getInputStream()) { context.getSAXParser().parse(stream, new OfflineContentHandler(new EmbeddedContentHandler(contentHandler))); } catch (IOException | TikaException e) { //do something with this } } catch (InvalidFormatException e) { } } contentHandler.endElement("", "div", "div"); } }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private void handleDocumentPart(PackagePart documentPart, XHTMLContentHandler xhtml) throws IOException, SAXException { //load the numbering/list manager and styles from the main document part XWPFNumbering numbering = loadNumbering(documentPart); XWPFListManager listManager = new XWPFListManager(numbering); XWPFStylesShim styles = loadStyles(documentPart); //headers/*from www .j a v a2 s . c om*/ try { PackageRelationshipCollection headersPRC = documentPart .getRelationshipsByType(XWPFRelation.HEADER.getRelation()); if (headersPRC != null) { for (int i = 0; i < headersPRC.size(); i++) { PackagePart header = documentPart.getRelatedPart(headersPRC.getRelationship(i)); handlePart(header, styles, listManager, xhtml); } } } catch (InvalidFormatException e) { //swallow } //main document handlePart(documentPart, styles, listManager, xhtml); //for now, just dump other components at end for (XWPFRelation rel : new XWPFRelation[] { XWPFRelation.FOOTNOTE, XWPFRelation.COMMENT, XWPFRelation.FOOTER, XWPFRelation.ENDNOTE }) { try { PackageRelationshipCollection prc = documentPart.getRelationshipsByType(rel.getRelation()); if (prc != null) { for (int i = 0; i < prc.size(); i++) { PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i)); handlePart(packagePart, styles, listManager, xhtml); } } } catch (InvalidFormatException e) { //swallow } } }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private XWPFStylesShim loadStyles(PackagePart packagePart) { try {//from w ww .j a v a 2s . c om PackageRelationshipCollection stylesParts = packagePart .getRelationshipsByType(XWPFRelation.STYLES.getRelation()); if (stylesParts.size() > 0) { PackageRelationship stylesRelationShip = stylesParts.getRelationship(0); if (stylesRelationShip == null) { return null; } PackagePart stylesPart = packagePart.getRelatedPart(stylesRelationShip); if (stylesPart == null) { return null; } return new XWPFStylesShim(stylesPart, context); } } catch (OpenXML4JException e) { //swallow } return null; }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private XWPFNumbering loadNumbering(PackagePart packagePart) { try {/*from w w w.j a va 2 s .c om*/ PackageRelationshipCollection numberingParts = packagePart .getRelationshipsByType(XWPFRelation.NUMBERING.getRelation()); if (numberingParts.size() > 0) { PackageRelationship numberingRelationShip = numberingParts.getRelationship(0); if (numberingRelationShip == null) { return null; } PackagePart numberingPart = packagePart.getRelatedPart(numberingRelationShip); if (numberingPart == null) { return null; } return new XWPFNumberingShim(numberingPart); } } catch (IOException | OpenXML4JException e) { //swallow } return null; }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private void addRelatedParts(PackagePart documentPart, List<PackagePart> relatedParts) { for (String relation : MAIN_PART_RELATIONS) { PackageRelationshipCollection prc = null; try {/*from w w w.j av a 2 s .c o m*/ prc = documentPart.getRelationshipsByType(relation); if (prc != null) { for (int i = 0; i < prc.size(); i++) { PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i)); relatedParts.add(packagePart); } } } catch (InvalidFormatException e) { } } }
From source file:org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor.java
License:Apache License
private void handleDocumentPart(PackagePart documentPart, StringBuilder sb) throws IOException, SAXException { //load the numbering/list manager and styles from the main document part XWPFNumbering numbering = loadNumbering(documentPart); XWPFListManager xwpfListManager = new XWPFListManager(numbering); //TODO: XWPFStyles styles = loadStyles(documentPart); //headers//from w w w . j av a2 s. c o m try { PackageRelationshipCollection headersPRC = documentPart .getRelationshipsByType(XWPFRelation.HEADER.getRelation()); if (headersPRC != null) { for (int i = 0; i < headersPRC.size(); i++) { PackagePart header = documentPart.getRelatedPart(headersPRC.getRelationship(i)); handlePart(header, xwpfListManager, sb); } } } catch (InvalidFormatException e) { //swallow } //main document handlePart(documentPart, xwpfListManager, sb); //for now, just dump other components at end for (XWPFRelation rel : new XWPFRelation[] { XWPFRelation.FOOTNOTE, XWPFRelation.COMMENT, XWPFRelation.FOOTER, XWPFRelation.ENDNOTE }) { try { PackageRelationshipCollection prc = documentPart.getRelationshipsByType(rel.getRelation()); if (prc != null) { for (int i = 0; i < prc.size(); i++) { PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i)); handlePart(packagePart, xwpfListManager, sb); } } } catch (InvalidFormatException e) { //swallow } } }