List of usage examples for org.apache.poi.openxml4j.opc PackageRelationship getTargetMode
public TargetMode getTargetMode()
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.java
License:Apache License
private void handleEmbeddedParts(ContentHandler handler) throws TikaException, IOException, SAXException { try {//from www. j ava 2 s .com for (PackagePart source : getMainDocumentParts()) { for (PackageRelationship rel : source.getRelationships()) { URI sourceURI = rel.getSourceURI(); String sourceDesc; if (sourceURI != null) { sourceDesc = getJustFileName(sourceURI.getPath()); if (sourceDesc.startsWith("slide")) { sourceDesc += "_"; } else { sourceDesc = ""; } } else { sourceDesc = ""; } if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePart target; try { target = source.getRelatedPart(rel); } catch (IllegalArgumentException ex) { continue; } String type = rel.getRelationshipType(); if (RELATION_OLE_OBJECT.equals(type) && TYPE_OLE_OBJECT.equals(target.getContentType())) { handleEmbeddedOLE(target, handler, sourceDesc + rel.getId()); } else if (RELATION_AUDIO.equals(type) || RELATION_IMAGE.equals(type) || RELATION_PACKAGE.equals(type) || RELATION_OLE_OBJECT.equals(type)) { handleEmbeddedFile(target, handler, sourceDesc + rel.getId()); } } } } } catch (InvalidFormatException e) { throw new TikaException("Broken OOXML file", e); } }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java
License:Apache License
/** * In PowerPoint files, slides have things embedded in them, and slide * drawings which have the images/* w w w . j ava 2 s . c om*/ */ @Override protected List<PackagePart> getMainDocumentParts() throws TikaException { List<PackagePart> parts = new ArrayList<PackagePart>(); XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument(); XSLFSlideShow document = null; try { document = slideShow._getXSLFSlideShow(); // TODO Avoid this in future } catch (Exception e) { throw new TikaException(e.getMessage()); // Shouldn't happen } for (CTSlideIdListEntry ctSlide : document.getSlideReferences().getSldIdList()) { // Add the slide PackagePart slidePart; try { slidePart = document.getSlidePart(ctSlide); } catch (IOException e) { throw new TikaException("Broken OOXML file", e); } catch (XmlException xe) { throw new TikaException("Broken OOXML file", xe); } parts.add(slidePart); // If it has drawings, return those too try { for (PackageRelationship rel : slidePart .getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { throw new TikaException("Broken OOXML file", e); } } return parts; }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator.java
License:Apache License
/** * In Excel files, sheets have things embedded in them, and sheet drawings * which have the images//from w w w . j a v a 2 s .c o m */ @Override protected List<PackagePart> getMainDocumentParts() throws TikaException { List<PackagePart> parts = new ArrayList<PackagePart>(); for (PackagePart part : sheetParts) { // Add the sheet parts.add(part); // If it has drawings, return those too try { for (PackageRelationship rel : part.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } for (PackageRelationship rel : part .getRelationshipsByType(XSSFRelation.VML_DRAWINGS.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { throw new TikaException("Broken OOXML file", e); } } return parts; }
From source file:org.apache.tika.parser.microsoft.ooxml.SXSLFPowerPointExtractorDecorator.java
License:Apache License
private void addSlideParts(PackagePart slidePart, List<PackagePart> parts) { for (String relation : new String[] { XSLFRelation.VML_DRAWING.getRelation(), XSLFRelation.SLIDE_LAYOUT.getRelation(), XSLFRelation.NOTES_MASTER.getRelation(), XSLFRelation.NOTES.getRelation() }) { try {// w ww. j av a2 s . c o m for (PackageRelationship packageRelationship : slidePart.getRelationshipsByType(relation)) { if (packageRelationship.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper .createPartName(packageRelationship.getTargetURI()); parts.add(packageRelationship.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { } } //and slide of course parts.add(slidePart); }
From source file:org.apache.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java
License:Apache License
/** * In PowerPoint files, slides have things embedded in them, * and slide drawings which have the images *///from w ww. j a v a2 s. c o m @Override protected List<PackagePart> getMainDocumentParts() throws TikaException { List<PackagePart> parts = new ArrayList<>(); XSLFSlideShow document = null; try { document = new XSLFSlideShow(extractor.getPackage()); } catch (Exception e) { throw new TikaException(e.getMessage()); // Shouldn't happen } CTSlideIdList ctSlideIdList = document.getSlideReferences(); if (ctSlideIdList != null) { for (int i = 0; i < ctSlideIdList.sizeOfSldIdArray(); i++) { CTSlideIdListEntry ctSlide = ctSlideIdList.getSldIdArray(i); // Add the slide PackagePart slidePart; try { slidePart = document.getSlidePart(ctSlide); } catch (IOException e) { throw new TikaException("Broken OOXML file", e); } catch (XmlException xe) { throw new TikaException("Broken OOXML file", xe); } parts.add(slidePart); // If it has drawings, return those too try { for (PackageRelationship rel : slidePart .getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) { if (rel.getTargetMode() == TargetMode.INTERNAL) { PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); parts.add(rel.getPackage().getPart(relName)); } } } catch (InvalidFormatException e) { throw new TikaException("Broken OOXML file", e); } } } return parts; }