List of usage examples for org.apache.poi.hslf.usermodel HSLFSlide getShapes
@Override
public List<HSLFShape> getShapes()
From source file:org.apache.tika.parser.microsoft.HSLFExtractor.java
License:Apache License
protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { HSLFSlideShow ss = new HSLFSlideShow(root); List<HSLFSlide> _slides = ss.getSlides(); xhtml.startElement("div", "class", "slideShow"); /* Iterate over slides and extract text */ for (HSLFSlide slide : _slides) { xhtml.startElement("div", "class", "slide"); // Slide header, if present HeadersFooters hf = slide.getHeadersFooters(); if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { xhtml.startElement("p", "class", "slide-header"); xhtml.characters(hf.getHeaderText()); xhtml.endElement("p"); }/*from w ww . j a va 2s. c o m*/ // Slide master, if present extractMaster(xhtml, slide.getMasterSheet()); // Slide text { xhtml.startElement("div", "class", "slide-content"); textRunsToText(xhtml, slide.getTextParagraphs()); xhtml.endElement("div"); } // Table text for (HSLFShape shape : slide.getShapes()) { if (shape instanceof HSLFTable) { extractTableText(xhtml, (HSLFTable) shape); } } // Slide footer, if present if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { xhtml.startElement("p", "class", "slide-footer"); xhtml.characters(hf.getFooterText()); xhtml.endElement("p"); } // Comments, if present StringBuilder authorStringBuilder = new StringBuilder(); for (Comment comment : slide.getComments()) { authorStringBuilder.setLength(0); xhtml.startElement("p", "class", "slide-comment"); if (comment.getAuthor() != null) { authorStringBuilder.append(comment.getAuthor()); } if (comment.getAuthorInitials() != null) { if (authorStringBuilder.length() > 0) { authorStringBuilder.append(" "); } authorStringBuilder.append("(" + comment.getAuthorInitials() + ")"); } if (authorStringBuilder.length() > 0) { if (comment.getText() != null) { authorStringBuilder.append(" - "); } xhtml.startElement("b"); xhtml.characters(authorStringBuilder.toString()); xhtml.endElement("b"); } if (comment.getText() != null) { xhtml.characters(comment.getText()); } xhtml.endElement("p"); } // Now any embedded resources handleSlideEmbeddedResources(slide, xhtml); // Find the Notes for this slide and extract inline HSLFNotes notes = slide.getNotes(); if (notes != null) { xhtml.startElement("div", "class", "slide-notes"); textRunsToText(xhtml, notes.getTextParagraphs()); xhtml.endElement("div"); } // Slide complete xhtml.endElement("div"); } // All slides done xhtml.endElement("div"); /* notes */ xhtml.startElement("div", "class", "slide-notes"); HashSet<Integer> seenNotes = new HashSet<>(); HeadersFooters hf = ss.getNotesHeadersFooters(); for (HSLFSlide slide : _slides) { HSLFNotes notes = slide.getNotes(); if (notes == null) { continue; } Integer id = notes._getSheetNumber(); if (seenNotes.contains(id)) { continue; } seenNotes.add(id); // Repeat the Notes header, if set if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { xhtml.startElement("p", "class", "slide-note-header"); xhtml.characters(hf.getHeaderText()); xhtml.endElement("p"); } // Notes text textRunsToText(xhtml, notes.getTextParagraphs()); // Repeat the notes footer, if set if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { xhtml.startElement("p", "class", "slide-note-footer"); xhtml.characters(hf.getFooterText()); xhtml.endElement("p"); } } handleSlideEmbeddedPictures(ss, xhtml); xhtml.endElement("div"); }
From source file:org.apache.tika.parser.microsoft.HSLFExtractor.java
License:Apache License
private void handleSlideEmbeddedResources(HSLFSlide slide, XHTMLContentHandler xhtml) throws TikaException, SAXException, IOException { List<HSLFShape> shapes; try {/*from w w w.jav a 2s .c o m*/ shapes = slide.getShapes(); } catch (NullPointerException e) { // Sometimes HSLF hits problems // Please open POI bugs for any you come across! return; } for (HSLFShape shape : shapes) { if (shape instanceof OLEShape) { OLEShape oleShape = (OLEShape) shape; HSLFObjectData data = null; try { data = oleShape.getObjectData(); } catch (NullPointerException e) { /* getObjectData throws NPE some times. */ } if (data != null) { String objID = Integer.toString(oleShape.getObjectID()); // Embedded Object: add a <div // class="embedded" id="X"/> so consumer can see where // in the main text each embedded document // occurred: AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", objID); xhtml.startElement("div", attributes); xhtml.endElement("div"); try (TikaInputStream stream = TikaInputStream.get(data.getData())) { String mediaType = null; if ("Excel.Chart.8".equals(oleShape.getProgID())) { mediaType = "application/vnd.ms-excel"; } handleEmbeddedResource(stream, objID, objID, mediaType, xhtml, false); } } } } }