List of usage examples for org.apache.poi.xslf.extractor XSLFPowerPointExtractor getText
public String getText(final boolean slideText, final boolean notesText)
From source file:com.jaeksoft.searchlib.parser.PptxParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException { // TODO Optimise if it is already a file File tempFile = File.createTempFile("oss", ".pptx"); FileOutputStream fos = null;// w w w . jav a 2 s .c o m try { fos = new FileOutputStream(tempFile); IOUtils.copy(streamLimiter.getNewInputStream(), fos); fos.close(); } catch (IOException e) { IOUtils.close(fos); throw e; } XSLFPowerPointExtractor poiExtractor = null; try { XSLFSlideShow pptSlideShow = new XSLFSlideShow(tempFile.getAbsolutePath()); poiExtractor = new XSLFPowerPointExtractor(pptSlideShow); ParserResultItem result = getNewParserResultItem(); CoreProperties info = poiExtractor.getCoreProperties(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.creator, info.getCreator()); result.addField(ParserFieldEnum.subject, info.getSubject()); result.addField(ParserFieldEnum.description, info.getDescription()); result.addField(ParserFieldEnum.keywords, info.getKeywords()); } String content = poiExtractor.getText(true, true); result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " ")); result.langDetection(10000, ParserFieldEnum.content); } catch (OpenXML4JException e) { throw new IOException(e); } catch (XmlException e) { throw new IOException(e); } finally { IOUtils.close(poiExtractor); } }
From source file:dk.defxws.fedoragsearch.server.TransformerToText.java
License:Open Source License
private static Stream getTextFromPPTX(InputStream doc) throws GenericSearchException { long time = System.currentTimeMillis(); boolean errorFlag = Boolean.parseBoolean(Config.getCurrentConfig().getIgnoreTextExtractionErrors()); XSLFPowerPointExtractor powerPointExtractor = null; try {/* w w w .ja v a 2s.co m*/ powerPointExtractor = new XSLFPowerPointExtractor(OPCPackage.open(doc)); StringBuffer buffer = new StringBuffer(powerPointExtractor.getText(true, true).trim()); Stream stream = new Stream(); stream.write(buffer.toString().getBytes(Constants.XML_CHARACTER_ENCODING)); stream.lock(); if (logger.isDebugEnabled()) { logger.debug("extracting text from pptx needed " + (System.currentTimeMillis() - time)); } return stream; } catch (Exception e) { if (errorFlag) { logger.warn("", e); return createErrorStream(pptxTextExtractionErrorString); } else { throw new GenericSearchException("cannot parse pptx-file", e); } } finally { powerPointExtractor = null; } }
From source file:org.exoplatform.services.document.impl.MSXPPTDocumentReader.java
License:Open Source License
/** * Returns only a text from .pptx file content. * /*www . jav a 2s .c om*/ * @param is an input stream with .pptx file content. * @return The string only with text from file content. */ public String getContentAsText(final InputStream is) throws IOException, DocumentReadException { if (is == null) { throw new IllegalArgumentException("InputStream is null."); } try { if (is.available() == 0) { return ""; } final XSLFPowerPointExtractor ppe; try { ppe = SecurityHelper .doPrivilegedExceptionAction(new PrivilegedExceptionAction<XSLFPowerPointExtractor>() { public XSLFPowerPointExtractor run() throws Exception { return new XSLFPowerPointExtractor(OPCPackage.open(is)); } }); } catch (PrivilegedActionException pae) { Throwable cause = pae.getCause(); if (cause instanceof IOException) { throw new DocumentReadException("Can't open presentation.", cause); } else if (cause instanceof OpenXML4JRuntimeException) { throw new DocumentReadException("Can't open presentation.", cause); } else if (cause instanceof OpenXML4JException) { throw new DocumentReadException("Can't open presentation.", cause); } else if (cause instanceof XmlException) { throw new DocumentReadException("Can't open presentation.", cause); } else if (cause instanceof RuntimeException) { throw (RuntimeException) cause; } else { throw new RuntimeException(cause); } } return SecurityHelper.doPrivilegedAction(new PrivilegedAction<String>() { public String run() { return ppe.getText(true, true); } }); } finally { if (is != null) { try { is.close(); } catch (IOException e) { if (LOG.isTraceEnabled()) { LOG.trace("An exception occurred: " + e.getMessage()); } } } } }