Example usage for org.apache.poi.hslf.usermodel HSLFTextParagraph getRunType

List of usage examples for org.apache.poi.hslf.usermodel HSLFTextParagraph getRunType

Introduction

In this page you can find the example usage for org.apache.poi.hslf.usermodel HSLFTextParagraph getRunType.

Prototype

public int getRunType() 

Source Link

Document

Returns the type of the text, from the TextHeaderAtom.

Usage

From source file:com.jaeksoft.searchlib.parser.PptParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {

    HSLFSlideShow ppt = new HSLFSlideShow(streamLimiter.getNewInputStream());
    List<HSLFSlide> slides = ppt.getSlides();
    ParserResultItem result = getNewParserResultItem();
    for (HSLFSlide slide : slides) {
        List<List<HSLFTextParagraph>> textLevel0 = slide.getTextParagraphs();
        for (List<HSLFTextParagraph> textLevel1 : textLevel0) {
            for (HSLFTextParagraph textPara : textLevel1) {
                ParserFieldEnum field;/*from w w w  . jav  a  2s.  co m*/
                switch (textPara.getRunType()) {
                case TextHeaderAtom.TITLE_TYPE:
                case TextHeaderAtom.CENTER_TITLE_TYPE:
                    field = ParserFieldEnum.title;
                    break;
                case TextHeaderAtom.NOTES_TYPE:
                    field = ParserFieldEnum.note;
                    break;
                case TextHeaderAtom.BODY_TYPE:
                case TextHeaderAtom.CENTRE_BODY_TYPE:
                case TextHeaderAtom.HALF_BODY_TYPE:
                case TextHeaderAtom.QUARTER_BODY_TYPE:
                    field = ParserFieldEnum.body;
                    break;
                case TextHeaderAtom.OTHER_TYPE:
                default:
                    field = ParserFieldEnum.other;
                    break;
                }
                StringBuilder sb = new StringBuilder();
                for (HSLFTextRun textRun : textPara.getTextRuns()) {
                    sb.append(textRun.getRawText());
                    sb.append(' ');
                }
                result.addField(field, StringUtils.replaceConsecutiveSpaces(sb.toString(), " "));
            }
        }
    }
    result.langDetection(10000, ParserFieldEnum.body);
}

From source file:com.qwazr.extractor.parser.Ppt.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception {

    HSLFSlideShow ppt = new HSLFSlideShow(inputStream);

    List<HSLFSlide> slides = ppt.getSlides();
    for (HSLFSlide slide : slides) {
        ParserDocument document = getNewParserDocument();
        List<List<HSLFTextParagraph>> textLevel0 = slide.getTextParagraphs();
        for (List<HSLFTextParagraph> textLevel1 : textLevel0) {
            for (HSLFTextParagraph textPara : textLevel1) {
                ParserField parserField;
                switch (textPara.getRunType()) {
                case TextHeaderAtom.TITLE_TYPE:
                case TextHeaderAtom.CENTER_TITLE_TYPE:
                    parserField = TITLE;
                    break;
                case TextHeaderAtom.NOTES_TYPE:
                    parserField = NOTES;
                    break;
                case TextHeaderAtom.BODY_TYPE:
                case TextHeaderAtom.CENTRE_BODY_TYPE:
                case TextHeaderAtom.HALF_BODY_TYPE:
                case TextHeaderAtom.QUARTER_BODY_TYPE:
                    parserField = BODY;/*from  w  ww.  j ava 2 s.co m*/
                    break;
                case TextHeaderAtom.OTHER_TYPE:
                default:
                    parserField = OTHER;
                    break;
                }
                StringBuilder sb = new StringBuilder();
                for (HSLFTextRun textRun : textPara.getTextRuns()) {
                    sb.append(textRun.getRawText());
                    sb.append(' ');
                }
                document.add(parserField, sb.toString().trim());
            }
        }
        document.add(LANG_DETECTION, languageDetection(document, BODY, 10000));
    }

}

From source file:com.qwazr.library.poi.PptParser.java

License:Apache License

@Override
public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream,
        final String extension, final String mimeType, final ParserResultBuilder resultBuilder)
        throws Exception {

    final HSLFSlideShow ppt = new HSLFSlideShow(inputStream);

    final ParserFieldsBuilder metas = resultBuilder.metas();
    metas.set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault));

    final List<HSLFSlide> slides = ppt.getSlides();
    for (HSLFSlide slide : slides) {
        final ParserFieldsBuilder document = resultBuilder.newDocument();
        final List<List<HSLFTextParagraph>> textLevel0 = slide.getTextParagraphs();
        for (List<HSLFTextParagraph> textLevel1 : textLevel0) {
            for (HSLFTextParagraph textPara : textLevel1) {
                final ParserField parserField;
                switch (textPara.getRunType()) {
                case TextHeaderAtom.TITLE_TYPE:
                case TextHeaderAtom.CENTER_TITLE_TYPE:
                    parserField = TITLE;
                    break;
                case TextHeaderAtom.NOTES_TYPE:
                    parserField = NOTES;
                    break;
                case TextHeaderAtom.BODY_TYPE:
                case TextHeaderAtom.CENTRE_BODY_TYPE:
                case TextHeaderAtom.HALF_BODY_TYPE:
                case TextHeaderAtom.QUARTER_BODY_TYPE:
                    parserField = BODY;/*from   w  w w  .j a va  2s. com*/
                    break;
                case TextHeaderAtom.OTHER_TYPE:
                default:
                    parserField = OTHER;
                    break;
                }
                StringBuilder sb = new StringBuilder();
                for (HSLFTextRun textRun : textPara.getTextRuns()) {
                    sb.append(textRun.getRawText());
                    sb.append(' ');
                }
                final String text = sb.toString().trim();
                document.add(parserField, text);
                if (parserField != TITLE)
                    document.add(CONTENT, text);
            }
        }
        document.add(LANG_DETECTION, languageDetection(document, CONTENT, 10000));
    }

}