Example usage for org.apache.poi.hslf.extractor PowerPointExtractor getDocSummaryInformation

List of usage examples for org.apache.poi.hslf.extractor PowerPointExtractor getDocSummaryInformation

Introduction

In this page you can find the example usage for org.apache.poi.hslf.extractor PowerPointExtractor getDocSummaryInformation.

Prototype

public DocumentSummaryInformation getDocSummaryInformation() 

Source Link

Document

Returns the document information metadata for the document

Usage

From source file:com.frameworkset.platform.cms.searchmanager.extractors.CmsExtractorMsPowerPoint.java

License:Open Source License

/** 
* ?ppt /*from  ww w.j a  v a  2  s  . co  m*/
  * @param path 
  * @return 
  */
public String readPowerPoint(InputStream in) {
    String content = null;
    try {
        HSLFSlideShow slideShow = new HSLFSlideShow(in);
        org.apache.poi.hslf.extractor.PowerPointExtractor extractor = new PowerPointExtractor(slideShow);
        this.m_documentSummary = extractor.getDocSummaryInformation();
        this.m_summary = extractor.getSummaryInformation();
        content = extractor.getText();
        //                 SlideShow ss = new SlideShow(new HSLFSlideShow(in));// is  
        //                // InputStreamSlideShow  
        //                Slide[] slides = ss.getSlides();// ??  
        //                 for (int i = 0; i < slides.length; i++) {  
        //                    TextRun[] t = slides[i].getTextRuns();// ??TextRun  
        //                     for (int j = 0; j < t.length; j++) {  
        //                         content.append(t[j].getText());// content  
        //                    }  
        //                 }  
    } catch (Exception ex) {
        System.out.println(ex.toString());
    }
    return content;
}

From source file:net.yacy.document.parser.pptParser.java

License:Open Source License

@Override
public Document[] parse(final AnchorURL location, final String mimeType, final String charset,
        final VocabularyScraper scraper, final int timezoneOffset, final InputStream source)
        throws Parser.Failure, InterruptedException {
    try {//from w w w  .j  a  va 2 s  . c om
        /*
         * create new PowerPointExtractor and extract text and notes
         * of the document
         */
        final PowerPointExtractor pptExtractor = new PowerPointExtractor(new BufferedInputStream(source));
        final String contents = pptExtractor.getText(true, true).trim();
        String title = contents.replaceAll("\r", " ").replaceAll("\n", " ").replaceAll("\t", " ").trim();
        if (title.length() > 80)
            title = title.substring(0, 80);
        int l = title.length();
        while (true) {
            title = title.replaceAll("  ", " ");
            if (title.length() == l)
                break;
            l = title.length();
        }
        // get keywords (for yacy as array)
        final String keywords = pptExtractor.getSummaryInformation().getKeywords();
        final String[] keywlist;
        if (keywords != null && !keywords.isEmpty()) {
            keywlist = CommonPattern.COMMA.split(keywords);
        } else
            keywlist = null;

        final String subject = pptExtractor.getSummaryInformation().getSubject();
        List<String> descriptions = new ArrayList<String>();
        if (subject != null && !subject.isEmpty())
            descriptions.add(subject);

        /*
         * create the plasmaParserDocument for the database
         * and set shortText and bodyText properly
         */
        final Document[] docs = new Document[] { new Document(location, mimeType, "UTF-8", this, null, keywlist,
                singleList(title), pptExtractor.getSummaryInformation().getAuthor(), // may be null
                pptExtractor.getDocSummaryInformation().getCompany(), null, descriptions, 0.0f, 0.0f, contents,
                null, null, null, false, new Date()) };
        return docs;
    } catch (final Exception e) {
        if (e instanceof InterruptedException)
            throw (InterruptedException) e;

        /*
         * an unexpected error occurred, log it and throw a Parser.Failure
         */
        ConcurrentLog.logException(e);
        final String errorMsg = "Unable to parse the ppt document '" + location + "':" + e.getMessage();
        AbstractParser.log.severe(errorMsg);
        throw new Parser.Failure(errorMsg, location);
    }
}