List of usage examples for org.apache.poi.xslf.extractor XSLFPowerPointExtractor getCoreProperties
public CoreProperties getCoreProperties()
From source file:com.frameworkset.platform.cms.searchmanager.extractors.CmsExtractorMsPowerPoint.java
License:Open Source License
/** * ?ppt // ww w .j a v a 2 s . c o m * @param path * @return */ public String readPowerPoint2007(InputStream in) { String content = null; try { XMLSlideShow xmlslideshow = new XMLSlideShow(in); org.apache.poi.xslf.extractor.XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor( xmlslideshow); this.cp = extractor.getCoreProperties(); content = extractor.getText(); // SlideShow ss = new SlideShow(new HSLFSlideShow(in));// is // // InputStreamSlideShow // Slide[] slides = ss.getSlides();// ?? // for (int i = 0; i < slides.length; i++) { // TextRun[] t = slides[i].getTextRuns();// ??TextRun // for (int j = 0; j < t.length; j++) { // content.append(t[j].getText());// content // } // } } catch (Exception ex) { System.out.println(ex.toString()); } return content; }
From source file:com.jaeksoft.searchlib.parser.PptxParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException { // TODO Optimise if it is already a file File tempFile = File.createTempFile("oss", ".pptx"); FileOutputStream fos = null;//from www. j ava2 s.c o m try { fos = new FileOutputStream(tempFile); IOUtils.copy(streamLimiter.getNewInputStream(), fos); fos.close(); } catch (IOException e) { IOUtils.close(fos); throw e; } XSLFPowerPointExtractor poiExtractor = null; try { XSLFSlideShow pptSlideShow = new XSLFSlideShow(tempFile.getAbsolutePath()); poiExtractor = new XSLFPowerPointExtractor(pptSlideShow); ParserResultItem result = getNewParserResultItem(); CoreProperties info = poiExtractor.getCoreProperties(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.creator, info.getCreator()); result.addField(ParserFieldEnum.subject, info.getSubject()); result.addField(ParserFieldEnum.description, info.getDescription()); result.addField(ParserFieldEnum.keywords, info.getKeywords()); } String content = poiExtractor.getText(true, true); result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " ")); result.langDetection(10000, ParserFieldEnum.content); } catch (OpenXML4JException e) { throw new IOException(e); } catch (XmlException e) { throw new IOException(e); } finally { IOUtils.close(poiExtractor); } }
From source file:com.opensearchserver.extractor.parser.Pptx.java
License:Apache License
@Override protected void parseContent(File file, String extension, String mimeType) throws Exception { XSLFSlideShow pptSlideShow = new XSLFSlideShow(file.getAbsolutePath()); XMLSlideShow slideshow = new XMLSlideShow(pptSlideShow.getPackage()); // Extract metadata XSLFPowerPointExtractor poiExtractor = null; try {//from w ww.java 2 s .c o m poiExtractor = new XSLFPowerPointExtractor(slideshow); CoreProperties info = poiExtractor.getCoreProperties(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(CREATOR, info.getCreator()); metas.add(SUBJECT, info.getSubject()); metas.add(DESCRIPTION, info.getDescription()); metas.add(KEYWORDS, info.getKeywords()); metas.add(CREATION_DATE, info.getCreated()); metas.add(MODIFICATION_DATE, info.getModified()); } } finally { poiExtractor.close(); } extractSides(slideshow); }
From source file:com.opensearchserver.textextractor.parser.Pptx.java
License:Open Source License
@Override protected void parseContent(File file) throws Exception { XSLFSlideShow pptSlideShow = new XSLFSlideShow(file.getAbsolutePath()); XMLSlideShow slideshow = new XMLSlideShow(pptSlideShow.getPackage()); // Extract metadata XSLFPowerPointExtractor poiExtractor = null; try {//from www.j a va 2s . c o m poiExtractor = new XSLFPowerPointExtractor(slideshow); CoreProperties info = poiExtractor.getCoreProperties(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(CREATOR, info.getCreator()); metas.add(SUBJECT, info.getSubject()); metas.add(DESCRIPTION, info.getDescription()); metas.add(KEYWORDS, info.getKeywords()); metas.add(CREATION_DATE, info.getCreated()); metas.add(MODIFICATION_DATE, info.getModified()); } } finally { poiExtractor.close(); } extractSides(slideshow); }