List of usage examples for org.apache.poi.hslf.model HeadersFooters getFooterText
public String getFooterText()
From source file:mj.ocraptor.extraction.tika.parser.microsoft.HSLFExtractor.java
License:Apache License
protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { HSLFSlideShow ss = new HSLFSlideShow(root); SlideShow _show = new SlideShow(ss); Slide[] _slides = _show.getSlides(); xhtml.startElement("div", "class", "slideShow"); /* Iterate over slides and extract text */ for (Slide slide : _slides) { xhtml.startElement("div", "class", "slide"); // Slide header, if present HeadersFooters hf = slide.getHeadersFooters(); if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { xhtml.startElement("p", "class", "slide-header"); xhtml.characters(hf.getHeaderText()); xhtml.endElement("p"); }/*from www. ja va 2 s . co m*/ // Slide master, if present extractMaster(xhtml, slide.getMasterSheet()); // Slide text { xhtml.startElement("p", "class", "slide-content"); textRunsToText(xhtml, slide.getTextRuns()); xhtml.endElement("p"); } // Table text for (Shape shape : slide.getShapes()) { if (shape instanceof Table) { extractTableText(xhtml, (Table) shape); } } // Slide footer, if present if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { xhtml.startElement("p", "class", "slide-footer"); xhtml.characters(hf.getFooterText()); xhtml.endElement("p"); } // Comments, if present for (Comment comment : slide.getComments()) { xhtml.startElement("p", "class", "slide-comment"); if (comment.getAuthor() != null) { xhtml.startElement("b"); xhtml.characters(comment.getAuthor()); xhtml.endElement("b"); if (comment.getText() != null) { xhtml.characters(" - "); } } if (comment.getText() != null) { xhtml.characters(comment.getText()); } xhtml.endElement("p"); } // Now any embedded resources handleSlideEmbeddedResources(slide, xhtml); // TODO Find the Notes for this slide and extract inline // Slide complete xhtml.endElement("div"); } // All slides done xhtml.endElement("div"); /* notes */ xhtml.startElement("div", "class", "slideNotes"); HashSet<Integer> seenNotes = new HashSet<Integer>(); HeadersFooters hf = _show.getNotesHeadersFooters(); for (Slide slide : _slides) { Notes notes = slide.getNotesSheet(); if (notes == null) { continue; } Integer id = Integer.valueOf(notes._getSheetNumber()); if (seenNotes.contains(id)) { continue; } seenNotes.add(id); // Repeat the Notes header, if set if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { xhtml.startElement("p", "class", "slide-note-header"); xhtml.characters(hf.getHeaderText()); xhtml.endElement("p"); } // Notes text textRunsToText(xhtml, notes.getTextRuns()); // Repeat the notes footer, if set if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { xhtml.startElement("p", "class", "slide-note-footer"); xhtml.characters(hf.getFooterText()); xhtml.endElement("p"); } } handleSlideEmbeddedPictures(_show, xhtml); xhtml.endElement("div"); extractImageText(xhtml, ss); }
From source file:org.apache.tika.parser.microsoft.HSLFExtractor.java
License:Apache License
protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { HSLFSlideShow ss = new HSLFSlideShow(root); List<HSLFSlide> _slides = ss.getSlides(); xhtml.startElement("div", "class", "slideShow"); /* Iterate over slides and extract text */ for (HSLFSlide slide : _slides) { xhtml.startElement("div", "class", "slide"); // Slide header, if present HeadersFooters hf = slide.getHeadersFooters(); if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { xhtml.startElement("p", "class", "slide-header"); xhtml.characters(hf.getHeaderText()); xhtml.endElement("p"); }/* w w w.j a v a 2s . c om*/ // Slide master, if present extractMaster(xhtml, slide.getMasterSheet()); // Slide text { xhtml.startElement("div", "class", "slide-content"); textRunsToText(xhtml, slide.getTextParagraphs()); xhtml.endElement("div"); } // Table text for (HSLFShape shape : slide.getShapes()) { if (shape instanceof HSLFTable) { extractTableText(xhtml, (HSLFTable) shape); } } // Slide footer, if present if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { xhtml.startElement("p", "class", "slide-footer"); xhtml.characters(hf.getFooterText()); xhtml.endElement("p"); } // Comments, if present StringBuilder authorStringBuilder = new StringBuilder(); for (Comment comment : slide.getComments()) { authorStringBuilder.setLength(0); xhtml.startElement("p", "class", "slide-comment"); if (comment.getAuthor() != null) { authorStringBuilder.append(comment.getAuthor()); } if (comment.getAuthorInitials() != null) { if (authorStringBuilder.length() > 0) { authorStringBuilder.append(" "); } authorStringBuilder.append("(" + comment.getAuthorInitials() + ")"); } if (authorStringBuilder.length() > 0) { if (comment.getText() != null) { authorStringBuilder.append(" - "); } xhtml.startElement("b"); xhtml.characters(authorStringBuilder.toString()); xhtml.endElement("b"); } if (comment.getText() != null) { xhtml.characters(comment.getText()); } xhtml.endElement("p"); } // Now any embedded resources handleSlideEmbeddedResources(slide, xhtml); // Find the Notes for this slide and extract inline HSLFNotes notes = slide.getNotes(); if (notes != null) { xhtml.startElement("div", "class", "slide-notes"); textRunsToText(xhtml, notes.getTextParagraphs()); xhtml.endElement("div"); } // Slide complete xhtml.endElement("div"); } // All slides done xhtml.endElement("div"); /* notes */ xhtml.startElement("div", "class", "slide-notes"); HashSet<Integer> seenNotes = new HashSet<>(); HeadersFooters hf = ss.getNotesHeadersFooters(); for (HSLFSlide slide : _slides) { HSLFNotes notes = slide.getNotes(); if (notes == null) { continue; } Integer id = notes._getSheetNumber(); if (seenNotes.contains(id)) { continue; } seenNotes.add(id); // Repeat the Notes header, if set if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { xhtml.startElement("p", "class", "slide-note-header"); xhtml.characters(hf.getHeaderText()); xhtml.endElement("p"); } // Notes text textRunsToText(xhtml, notes.getTextParagraphs()); // Repeat the notes footer, if set if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { xhtml.startElement("p", "class", "slide-note-footer"); xhtml.characters(hf.getFooterText()); xhtml.endElement("p"); } } handleSlideEmbeddedPictures(ss, xhtml); xhtml.endElement("div"); }
From source file:uk.ac.liverpool.MSOffice.MSPowerpoint.java
License:Open Source License
private String extraxtXMLText(SlideShow ppt) { Slide[] slides = ppt.getSlides();/*w w w . j av a 2s . c om*/ Element root = new Element("Slideshow"); if (getDocument() != null) if (getDocument().getURI() != null) root.setAttribute("URI", getDocument().getURI().toString()); for (Slide s : slides) { Element slide = new Element("Slide"); if (s.getTitle() != null) slide.setAttribute("title", s.getTitle()); slide.setAttribute("slideNumber", "" + s.getSlideNumber()); HeadersFooters hf = s.getHeadersFooters(); if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { Element header = new Element("Header"); header.addContent(hf.getHeaderText()); slide.addContent(header); } TextRun[] runs = s.getTextRuns(); StringBuilder ret = new StringBuilder(); for (int j = 0; j < runs.length; j++) { TextRun run = runs[j]; if (run != null) { String text = run.getText(); ret.append(text); if (!text.endsWith("\n")) { ret.append("\n"); } } } try { slide.addContent(ret.toString()); } catch (IllegalDataException x) { for (int i = 0; i < ret.length(); i++) { char c = ret.charAt(i); if (c < 0x20) { if (c != 0x9 && c != 0xA && c != 0xD) ret.deleteCharAt(i); } } } slide.addContent(ret.toString()); if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { Element footer = new Element("Footer"); footer.addContent(hf.getFooterText()); slide.addContent(footer); } Comment[] comments = s.getComments(); for (int j = 0; j < comments.length; j++) { Element comment = new Element("Comment"); comment.setAttribute("author", comments[j].getAuthor()); comment.setAttribute("authorInitials", comments[j].getAuthorInitials()); comment.addContent(comments[j].getText()); slide.addContent(comment); } StringBuilder sb = new StringBuilder(); Notes nn = s.getNotesSheet(); if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { sb.append(hf.getHeaderText() + "\n"); } if (nn != null) { TextRun[] ts = nn.getTextRuns(); for (TextRun t : ts) { sb.append(t.getText()); } } if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { sb.append(hf.getFooterText() + "\n"); } if (sb != null && sb.length() > 0) { Element note = new Element("PresenterNote"); note.addContent(sb.toString()); slide.addContent(note); } root.addContent(slide); } org.jdom.Document d = new org.jdom.Document(root); XMLOutputter p = new XMLOutputter(Format.getPrettyFormat()); return p.outputString(d); }
From source file:uk.ac.liverpool.MSOffice.MSPowerpoint.java
License:Open Source License
/** * Draws a specific page in the Powerpoint document. Also takes care of the * display of the notes from the slideshow. * /*from w w w.j a v a 2 s . c o m*/ * @param attr * @param doc * @return */ private Leaf drawPage(String attr, Document doc) { // retrieve the slides from the docuemnt attributes SlideShow ppt = (SlideShow) doc.getValue("ppt"); Slide[] slide = ppt.getSlides(); // clean the doc doc.removeAllChildren(); // page number int i = Integer.parseInt(attr) - 1; String title = slide[i].getTitle(); System.out.println("Rendering slide " + slide[i].getSlideNumber() + (title == null ? "" : ": " + title)); // create the slide object and add it to the document LeafSlide l = new LeafSlide("slide", null, doc, slide[i], getZoom()); if (getZoom() != prevZoom) { prevZoom = getZoom(); } HeadersFooters hf = ppt.getNotesHeadersFooters(); // Interpret the notes StringBuilder sb = new StringBuilder(); Notes nn = slide[i].getNotesSheet(); if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { sb.append(hf.getHeaderText() + "\n"); } if (nn != null) { TextRun[] ts = nn.getTextRuns(); for (TextRun t : ts) { sb.append(t.getText()); } } if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { sb.append(hf.getFooterText() + "\n"); } // Display a preesnter's notes pop up window if (sb != null && sb.length() > 0) { Layer sc = doc.getLayer(Layer.SCRATCH); sc.clearBehaviors(); Map<String, Object> m = new HashMap<String, Object>(); m.put("text", sb.toString()); ImageInternalDataFrame ls = (ImageInternalDataFrame) Behavior.getInstance("ImageInternalDataFrame", "uk.ac.liverpool.fab4.ImageInternalDataFrame", null, m, sc); ls.setTitle("Presenter's Notes"); ls.setTransparent(true); ls.setBounds(10, l.getHeight() + 10, l.getWidth() - 20, doc.bbox.height - l.getHeight() - 30); } return l; }
From source file:uk.ac.liverpool.thumbnails.PPTService.java
License:Open Source License
public String extractXMLText(URI u, File f) throws MalformedURLException, IOException { HSLFSlideShow hppt = getHPPT(u, f);/* w w w. ja v a 2 s . com*/ SlideShow ppt = new SlideShow(hppt); Element root = new Element("Slideshow"); root.setAttribute("URI", u.toString()); Slide[] slides = ppt.getSlides(); for (Slide s : slides) { Element slide = new Element("Slide"); if (s.getTitle() != null) slide.setAttribute("title", s.getTitle()); slide.setAttribute("slideNumber", "" + s.getSlideNumber()); HeadersFooters hf = s.getHeadersFooters(); if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { Element header = new Element("Header"); header.addContent(hf.getHeaderText()); slide.addContent(header); } TextRun[] runs = s.getTextRuns(); StringBuilder ret = new StringBuilder(); for (int j = 0; j < runs.length; j++) { TextRun run = runs[j]; if (run != null) { String text = run.getText(); ret.append(text); if (!text.endsWith("\n")) { ret.append("\n"); } } } try { slide.addContent(ret.toString()); } catch (IllegalDataException x) { for (int i = 0; i < ret.length(); i++) { char c = ret.charAt(i); if (c < 0x20) { if (c != 0x9 && c != 0xA && c != 0xD) ret.deleteCharAt(i); } } } slide.addContent(ret.toString()); if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { Element footer = new Element("Footer"); footer.addContent(hf.getFooterText()); slide.addContent(footer); } Comment[] comments = s.getComments(); for (int j = 0; j < comments.length; j++) { Element comment = new Element("Comment"); comment.setAttribute("author", comments[j].getAuthor()); comment.setAttribute("authorInitials", comments[j].getAuthorInitials()); comment.addContent(comments[j].getText()); slide.addContent(comment); } StringBuilder sb = new StringBuilder(); Notes nn = s.getNotesSheet(); if (hf != null && hf.isHeaderVisible() && hf.getHeaderText() != null) { sb.append(hf.getHeaderText() + "\n"); } if (nn != null) { TextRun[] ts = nn.getTextRuns(); for (TextRun t : ts) { sb.append(t.getText()); } } if (hf != null && hf.isFooterVisible() && hf.getFooterText() != null) { sb.append(hf.getFooterText() + "\n"); } if (sb != null && sb.length() > 0) { Element note = new Element("PresenterNote"); note.addContent(sb.toString()); slide.addContent(note); } root.addContent(slide); } org.jdom.Document d = new org.jdom.Document(root); XMLOutputter p = new XMLOutputter(Format.getPrettyFormat()); return p.outputString(d); }