List of usage examples for org.apache.poi.openxml4j.opc PackagePart getRelationshipsByType
public PackageRelationshipCollection getRelationshipsByType(String relationshipType) throws InvalidFormatException
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java
License:Apache License
/** * In PowerPoint files, slides have things embedded in them, and slide * drawings which have the images/* w w w . j a v a 2 s. co m*/ */ @Override protected List<PackagePart> getMainDocumentParts() throws TikaException { List<PackagePart> parts = new ArrayList<PackagePart>(); XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument(); XSLFSlideShow document = null; try { document = slideShow._getXSLFSlideShow(); // TODO Avoid this in future } catch (Exception e) { throw new TikaException(e.getMessage()); // Shouldn't happen } for (CTSlideIdListEntry ctSlide : document.getSlideReferences().getSldIdList()) { // Add the slide PackagePart slidePart; try { slidePart = document.getSlidePart(ctSlide); } catch (IOException e) { throw new TikaException("Broken OOXML file", e); } catch (XmlException xe) { throw new TikaException("Broken OOXML file", xe); } parts.add(slidePart); // If it has drawings, return those too try { for (PackageRelationship rel : slidePart .getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { throw new TikaException("Broken OOXML file", e); } } return parts; }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java
License:Apache License
/** * In Excel files, sheets have things embedded in them, and sheet drawings * which have the images/*w w w. j av a 2s . c om*/ */ @Override protected List<PackagePart> getMainDocumentParts() throws TikaException { List<PackagePart> parts = new ArrayList<PackagePart>(); for (PackagePart part : sheetParts) { // Add the sheet parts.add(part); // If it has drawings, return those too try { for (PackageRelationship rel : part.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } for (PackageRelationship rel : part .getRelationshipsByType(XSSFRelation.VML_DRAWINGS.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { throw new TikaException("Broken OOXML file", e); } } return parts; }
From source file:org.apache.tika.parser.microsoft.ooxml.SXSLFPowerPointExtractorDecorator.java
License:Apache License
/** * This should handle the comments, master, notes, etc * * @param contentType// ww w.j a va 2 s . co m * @param xhtmlClassLabel * @param parentPart * @param contentHandler */ private void handleBasicRelatedParts(String contentType, String xhtmlClassLabel, PackagePart parentPart, ContentHandler contentHandler) throws SAXException { PackageRelationshipCollection relatedPartPRC = null; try { relatedPartPRC = parentPart.getRelationshipsByType(contentType); } catch (InvalidFormatException e) { //swallow } if (relatedPartPRC != null && relatedPartPRC.size() > 0) { AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", xhtmlClassLabel); contentHandler.startElement("", "div", "div", attributes); for (int i = 0; i < relatedPartPRC.size(); i++) { PackageRelationship relatedPartPackageRelationship = relatedPartPRC.getRelationship(i); try { PackagePart relatedPartPart = parentPart.getRelatedPart(relatedPartPackageRelationship); try (InputStream stream = relatedPartPart.getInputStream()) { context.getSAXParser().parse(stream, new OfflineContentHandler(new EmbeddedContentHandler(contentHandler))); } catch (IOException | TikaException e) { //do something with this } } catch (InvalidFormatException e) { } } contentHandler.endElement("", "div", "div"); } }
From source file:org.apache.tika.parser.microsoft.ooxml.SXSLFPowerPointExtractorDecorator.java
License:Apache License
private void addSlideParts(PackagePart slidePart, List<PackagePart> parts) { for (String relation : new String[] { XSLFRelation.VML_DRAWING.getRelation(), XSLFRelation.SLIDE_LAYOUT.getRelation(), XSLFRelation.NOTES_MASTER.getRelation(), XSLFRelation.NOTES.getRelation() }) { try {//from ww w .j a v a 2 s.com for (PackageRelationship packageRelationship : slidePart.getRelationshipsByType(relation)) { if (packageRelationship.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper .createPartName(packageRelationship.getTargetURI()); parts.add(packageRelationship.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { } } //and slide of course parts.add(slidePart); }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private void handleDocumentPart(PackagePart documentPart, XHTMLContentHandler xhtml) throws IOException, SAXException { //load the numbering/list manager and styles from the main document part XWPFNumbering numbering = loadNumbering(documentPart); XWPFListManager listManager = new XWPFListManager(numbering); XWPFStylesShim styles = loadStyles(documentPart); //headers/*from w w w . ja v a2 s . c o m*/ try { PackageRelationshipCollection headersPRC = documentPart .getRelationshipsByType(XWPFRelation.HEADER.getRelation()); if (headersPRC != null) { for (int i = 0; i < headersPRC.size(); i++) { PackagePart header = documentPart.getRelatedPart(headersPRC.getRelationship(i)); handlePart(header, styles, listManager, xhtml); } } } catch (InvalidFormatException e) { //swallow } //main document handlePart(documentPart, styles, listManager, xhtml); //for now, just dump other components at end for (XWPFRelation rel : new XWPFRelation[] { XWPFRelation.FOOTNOTE, XWPFRelation.COMMENT, XWPFRelation.FOOTER, XWPFRelation.ENDNOTE }) { try { PackageRelationshipCollection prc = documentPart.getRelationshipsByType(rel.getRelation()); if (prc != null) { for (int i = 0; i < prc.size(); i++) { PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i)); handlePart(packagePart, styles, listManager, xhtml); } } } catch (InvalidFormatException e) { //swallow } } }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private XWPFStylesShim loadStyles(PackagePart packagePart) { try {// w w w. j ava 2s .co m PackageRelationshipCollection stylesParts = packagePart .getRelationshipsByType(XWPFRelation.STYLES.getRelation()); if (stylesParts.size() > 0) { PackageRelationship stylesRelationShip = stylesParts.getRelationship(0); if (stylesRelationShip == null) { return null; } PackagePart stylesPart = packagePart.getRelatedPart(stylesRelationShip); if (stylesPart == null) { return null; } return new XWPFStylesShim(stylesPart, context); } } catch (OpenXML4JException e) { //swallow } return null; }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private XWPFNumbering loadNumbering(PackagePart packagePart) { try {/* w w w .j a va 2 s. co m*/ PackageRelationshipCollection numberingParts = packagePart .getRelationshipsByType(XWPFRelation.NUMBERING.getRelation()); if (numberingParts.size() > 0) { PackageRelationship numberingRelationShip = numberingParts.getRelationship(0); if (numberingRelationShip == null) { return null; } PackagePart numberingPart = packagePart.getRelatedPart(numberingRelationShip); if (numberingPart == null) { return null; } return new XWPFNumberingShim(numberingPart); } } catch (IOException | OpenXML4JException e) { //swallow } return null; }
From source file:org.apache.tika.parser.microsoft.ooxml.SXWPFWordExtractorDecorator.java
License:Apache License
private void addRelatedParts(PackagePart documentPart, List<PackagePart> relatedParts) { for (String relation : MAIN_PART_RELATIONS) { PackageRelationshipCollection prc = null; try {//from w w w. ja va2 s . co m prc = documentPart.getRelationshipsByType(relation); if (prc != null) { for (int i = 0; i < prc.size(); i++) { PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i)); relatedParts.add(packagePart); } } } catch (InvalidFormatException e) { } } }
From source file:org.apache.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java
License:Apache License
/** * In PowerPoint files, slides have things embedded in them, * and slide drawings which have the images *//* www . jav a 2s.com*/ @Override protected List<PackagePart> getMainDocumentParts() throws TikaException { List<PackagePart> parts = new ArrayList<>(); XSLFSlideShow document = null; try { document = new XSLFSlideShow(extractor.getPackage()); } catch (Exception e) { throw new TikaException(e.getMessage()); // Shouldn't happen } CTSlideIdList ctSlideIdList = document.getSlideReferences(); if (ctSlideIdList != null) { for (int i = 0; i < ctSlideIdList.sizeOfSldIdArray(); i++) { CTSlideIdListEntry ctSlide = ctSlideIdList.getSldIdArray(i); // Add the slide PackagePart slidePart; try { slidePart = document.getSlidePart(ctSlide); } catch (IOException e) { throw new TikaException("Broken OOXML file", e); } catch (XmlException xe) { throw new TikaException("Broken OOXML file", xe); } parts.add(slidePart); // If it has drawings, return those too try { for (PackageRelationship rel : slidePart .getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { throw new TikaException("Broken OOXML file", e); } } } return parts; }
From source file:org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor.java
License:Apache License
private void handleDocumentPart(PackagePart documentPart, StringBuilder sb) throws IOException, SAXException { //load the numbering/list manager and styles from the main document part XWPFNumbering numbering = loadNumbering(documentPart); XWPFListManager xwpfListManager = new XWPFListManager(numbering); //TODO: XWPFStyles styles = loadStyles(documentPart); //headers//w w w .j a va 2s . co m try { PackageRelationshipCollection headersPRC = documentPart .getRelationshipsByType(XWPFRelation.HEADER.getRelation()); if (headersPRC != null) { for (int i = 0; i < headersPRC.size(); i++) { PackagePart header = documentPart.getRelatedPart(headersPRC.getRelationship(i)); handlePart(header, xwpfListManager, sb); } } } catch (InvalidFormatException e) { //swallow } //main document handlePart(documentPart, xwpfListManager, sb); //for now, just dump other components at end for (XWPFRelation rel : new XWPFRelation[] { XWPFRelation.FOOTNOTE, XWPFRelation.COMMENT, XWPFRelation.FOOTER, XWPFRelation.ENDNOTE }) { try { PackageRelationshipCollection prc = documentPart.getRelationshipsByType(rel.getRelation()); if (prc != null) { for (int i = 0; i < prc.size(); i++) { PackagePart packagePart = documentPart.getRelatedPart(prc.getRelationship(i)); handlePart(packagePart, xwpfListManager, sb); } } } catch (InvalidFormatException e) { //swallow } } }