Example usage for org.apache.poi.hpbf.extractor PublisherTextExtractor getSummaryInformation

List of usage examples for org.apache.poi.hpbf.extractor PublisherTextExtractor getSummaryInformation

Introduction

In this page you can find the example usage for org.apache.poi.hpbf.extractor PublisherTextExtractor getSummaryInformation.

Prototype

public SummaryInformation getSummaryInformation() 

Source Link

Document

Returns the summary information metadata for the document.

Usage

From source file:com.jaeksoft.searchlib.parser.PublisherParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException {
    PublisherTextExtractor extractor = null;
    try {//  ww w. ja  v a  2  s. c  om
        extractor = new PublisherTextExtractor(streamLimiter.getNewInputStream());
        SummaryInformation info = extractor.getSummaryInformation();
        ParserResultItem result = getNewParserResultItem();
        if (info != null) {
            result.addField(ParserFieldEnum.title, info.getTitle());
            result.addField(ParserFieldEnum.author, info.getAuthor());
            result.addField(ParserFieldEnum.subject, info.getSubject());
        }
        result.addField(ParserFieldEnum.content,
                StringUtils.replaceConsecutiveSpaces(extractor.getText(), " "));
        result.langDetection(10000, ParserFieldEnum.content);
    } finally {
        IOUtils.close(extractor);
    }
}

From source file:com.opensearchserver.extractor.parser.Publisher.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception {
    PublisherTextExtractor extractor = null;
    try {// w  ww .  ja  v  a  2 s .c  om
        extractor = new PublisherTextExtractor(inputStream);
        SummaryInformation info = extractor.getSummaryInformation();

        if (info != null) {
            metas.add(TITLE, info.getTitle());
            metas.add(AUTHOR, info.getAuthor());
            metas.add(SUBJECT, info.getSubject());
            metas.add(CREATION_DATE, info.getCreateDateTime());
            metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
            metas.add(CONTENT, info.getKeywords());
            metas.add(COMMENTS, info.getComments());
        }
        String text = extractor.getText();
        if (StringUtils.isEmpty(text))
            return;
        ParserDocument result = getNewParserDocument();
        result.add(CONTENT, text);
        result.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
    } finally {
        if (extractor != null)
            IOUtils.closeQuietly(extractor);
    }
}