List of usage examples for org.apache.poi.xslf.usermodel XSLFSheet getShapes
@Override
public List<XSLFShape> getShapes()
From source file:mj.ocraptor.extraction.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java
License:Apache License
/** * @see org.apache.poi.xslf.extractor.XSLFPowerPointExtractor#getText() *///from w w w. ja v a 2s .c o m protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, IOException { XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument(); XSLFSlide[] slides = slideShow.getSlides(); for (XSLFSlide slide : slides) { String slideDesc; if (slide.getPackagePart() != null && slide.getPackagePart().getPartName() != null) { slideDesc = getJustFileName(slide.getPackagePart().getPartName().toString()); slideDesc += "_"; } else { slideDesc = null; } // slide extractContent(slide.getShapes(), false, xhtml, slideDesc); // slide layout which is the master sheet for this slide XSLFSheet slideLayout = slide.getMasterSheet(); extractContent(slideLayout.getShapes(), true, xhtml, null); // slide master which is the master sheet for all text layouts XSLFSheet slideMaster = slideLayout.getMasterSheet(); extractContent(slideMaster.getShapes(), true, xhtml, null); // notes (if present) XSLFSheet slideNotes = slide.getNotes(); if (slideNotes != null) { extractContent(slideNotes.getShapes(), false, xhtml, slideDesc); // master sheet for this notes XSLFSheet notesMaster = slideNotes.getMasterSheet(); extractContent(notesMaster.getShapes(), true, xhtml, null); } // comments (if present) XSLFComments comments = slide.getComments(); if (comments != null) { for (CTComment comment : comments.getCTCommentsList().getCmList()) { xhtml.element("p", comment.getText()); } } } if (Config.inst().getProp(ConfigBool.ENABLE_IMAGE_OCR)) { TikaImageHelper helper = new TikaImageHelper(metadata); try { List<XSLFPictureData> pictures = slideShow.getAllPictures(); for (XSLFPictureData picture : pictures) { ByteArrayInputStream imageData = new ByteArrayInputStream(picture.getData()); BufferedImage image = ImageIO.read(imageData); helper.addImage(image); helper.addTextToHandler(xhtml); } } catch (Exception e) { e.printStackTrace(); } finally { if (extractor != null) { extractor.close(); } if (helper != null) { helper.close(); } } } }
From source file:org.apache.tika.parser.microsoft.ooxml.XSLFPowerPointExtractorDecorator.java
License:Apache License
/** * @see org.apache.poi.xslf.extractor.XSLFPowerPointExtractor#getText() *///from w w w. j av a 2s . co m protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, IOException { XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument(); XSLFCommentAuthors commentAuthors = slideShow.getCommentAuthors(); List<XSLFSlide> slides = slideShow.getSlides(); for (XSLFSlide slide : slides) { String slideDesc; if (slide.getPackagePart() != null && slide.getPackagePart().getPartName() != null) { slideDesc = getJustFileName(slide.getPackagePart().getPartName().toString()); slideDesc += "_"; } else { slideDesc = null; } // slide content xhtml.startElement("div", "class", "slide-content"); extractContent(slide.getShapes(), false, xhtml, slideDesc); xhtml.endElement("div"); // slide layout which is the master sheet for this slide xhtml.startElement("div", "class", "slide-master-content"); XSLFSlideLayout slideLayout = slide.getMasterSheet(); extractContent(slideLayout.getShapes(), true, xhtml, null); xhtml.endElement("div"); // slide master which is the master sheet for all text layouts XSLFSheet slideMaster = slideLayout.getMasterSheet(); extractContent(slideMaster.getShapes(), true, xhtml, null); // notes (if present) XSLFNotes slideNotes = slide.getNotes(); if (slideNotes != null) { xhtml.startElement("div", "class", "slide-notes"); extractContent(slideNotes.getShapes(), false, xhtml, slideDesc); // master sheet for this notes XSLFNotesMaster notesMaster = slideNotes.getMasterSheet(); extractContent(notesMaster.getShapes(), true, xhtml, null); xhtml.endElement("div"); } // comments (if present) XSLFComments comments = slide.getComments(); if (comments != null) { StringBuilder authorStringBuilder = new StringBuilder(); for (int i = 0; i < comments.getNumberOfComments(); i++) { authorStringBuilder.setLength(0); CTComment comment = comments.getCommentAt(i); xhtml.startElement("p", "class", "slide-comment"); CTCommentAuthor cta = commentAuthors.getAuthorById(comment.getAuthorId()); if (cta != null) { if (cta.getName() != null) { authorStringBuilder.append(cta.getName()); } if (cta.getInitials() != null) { if (authorStringBuilder.length() > 0) { authorStringBuilder.append(" "); } authorStringBuilder.append("(" + cta.getInitials() + ")"); } if (comment.getText() != null && authorStringBuilder.length() > 0) { authorStringBuilder.append(" - "); } if (authorStringBuilder.length() > 0) { xhtml.startElement("b"); xhtml.characters(authorStringBuilder.toString()); xhtml.endElement("b"); } } xhtml.characters(comment.getText()); xhtml.endElement("p"); } } } }