Example usage for org.apache.poi.xslf.extractor XSLFPowerPointExtractor getText

List of usage examples for org.apache.poi.xslf.extractor XSLFPowerPointExtractor getText

Introduction

In this page you can find the example usage for org.apache.poi.xslf.extractor XSLFPowerPointExtractor getText.

Prototype

public String getText(final boolean slideText, final boolean notesText) 

Source Link

Document

Gets the requested text from the file

Usage

From source file:com.jaeksoft.searchlib.parser.PptxParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    // TODO Optimise if it is already a file
    File tempFile = File.createTempFile("oss", ".pptx");
    FileOutputStream fos = null;// w w  w .  jav  a  2  s .c  o  m
    try {
        fos = new FileOutputStream(tempFile);
        IOUtils.copy(streamLimiter.getNewInputStream(), fos);
        fos.close();
    } catch (IOException e) {
        IOUtils.close(fos);
        throw e;
    }

    XSLFPowerPointExtractor poiExtractor = null;
    try {
        XSLFSlideShow pptSlideShow = new XSLFSlideShow(tempFile.getAbsolutePath());
        poiExtractor = new XSLFPowerPointExtractor(pptSlideShow);

        ParserResultItem result = getNewParserResultItem();
        CoreProperties info = poiExtractor.getCoreProperties();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.creator, info.getCreator());
            result.addField(ParserFieldEnum.subject, info.getSubject());
            result.addField(ParserFieldEnum.description, info.getDescription());
            result.addField(ParserFieldEnum.keywords, info.getKeywords());
        }

        String content = poiExtractor.getText(true, true);
        result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " "));

        result.langDetection(10000, ParserFieldEnum.content);

    } catch (OpenXML4JException e) {
        throw new IOException(e);
    } catch (XmlException e) {
        throw new IOException(e);
    } finally {
        IOUtils.close(poiExtractor);
    }

}

From source file:dk.defxws.fedoragsearch.server.TransformerToText.java

License:Open Source License

private static Stream getTextFromPPTX(InputStream doc) throws GenericSearchException {
    long time = System.currentTimeMillis();
    boolean errorFlag = Boolean.parseBoolean(Config.getCurrentConfig().getIgnoreTextExtractionErrors());
    XSLFPowerPointExtractor powerPointExtractor = null;
    try {/*  w w w .ja v  a  2s.co  m*/
        powerPointExtractor = new XSLFPowerPointExtractor(OPCPackage.open(doc));
        StringBuffer buffer = new StringBuffer(powerPointExtractor.getText(true, true).trim());
        Stream stream = new Stream();
        stream.write(buffer.toString().getBytes(Constants.XML_CHARACTER_ENCODING));
        stream.lock();
        if (logger.isDebugEnabled()) {
            logger.debug("extracting text from pptx needed " + (System.currentTimeMillis() - time));
        }
        return stream;
    } catch (Exception e) {
        if (errorFlag) {
            logger.warn("", e);
            return createErrorStream(pptxTextExtractionErrorString);
        } else {
            throw new GenericSearchException("cannot parse pptx-file", e);
        }
    } finally {
        powerPointExtractor = null;
    }
}

From source file:org.exoplatform.services.document.impl.MSXPPTDocumentReader.java

License:Open Source License

/**
 * Returns only a text from .pptx file content.
 * /*www  . jav a  2s .c om*/
 * @param is an input stream with .pptx file content.
 * @return The string only with text from file content.
 */
public String getContentAsText(final InputStream is) throws IOException, DocumentReadException {
    if (is == null) {
        throw new IllegalArgumentException("InputStream is null.");
    }
    try {
        if (is.available() == 0) {
            return "";
        }

        final XSLFPowerPointExtractor ppe;
        try {
            ppe = SecurityHelper
                    .doPrivilegedExceptionAction(new PrivilegedExceptionAction<XSLFPowerPointExtractor>() {
                        public XSLFPowerPointExtractor run() throws Exception {
                            return new XSLFPowerPointExtractor(OPCPackage.open(is));
                        }
                    });
        } catch (PrivilegedActionException pae) {
            Throwable cause = pae.getCause();
            if (cause instanceof IOException) {
                throw new DocumentReadException("Can't open presentation.", cause);
            } else if (cause instanceof OpenXML4JRuntimeException) {
                throw new DocumentReadException("Can't open presentation.", cause);
            } else if (cause instanceof OpenXML4JException) {
                throw new DocumentReadException("Can't open presentation.", cause);
            } else if (cause instanceof XmlException) {
                throw new DocumentReadException("Can't open presentation.", cause);
            } else if (cause instanceof RuntimeException) {
                throw (RuntimeException) cause;
            } else {
                throw new RuntimeException(cause);
            }
        }
        return SecurityHelper.doPrivilegedAction(new PrivilegedAction<String>() {
            public String run() {
                return ppe.getText(true, true);
            }
        });
    } finally {
        if (is != null) {
            try {
                is.close();
            } catch (IOException e) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace("An exception occurred: " + e.getMessage());
                }
            }
        }
    }
}