List of usage examples for org.apache.poi.hpbf.extractor PublisherTextExtractor getSummaryInformation
public SummaryInformation getSummaryInformation()
From source file:com.jaeksoft.searchlib.parser.PublisherParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException { PublisherTextExtractor extractor = null; try {// ww w. ja v a 2 s. c om extractor = new PublisherTextExtractor(streamLimiter.getNewInputStream()); SummaryInformation info = extractor.getSummaryInformation(); ParserResultItem result = getNewParserResultItem(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.author, info.getAuthor()); result.addField(ParserFieldEnum.subject, info.getSubject()); } result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(extractor.getText(), " ")); result.langDetection(10000, ParserFieldEnum.content); } finally { IOUtils.close(extractor); } }
From source file:com.opensearchserver.extractor.parser.Publisher.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception { PublisherTextExtractor extractor = null; try {// w ww . ja v a 2 s .c om extractor = new PublisherTextExtractor(inputStream); SummaryInformation info = extractor.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(CONTENT, info.getKeywords()); metas.add(COMMENTS, info.getComments()); } String text = extractor.getText(); if (StringUtils.isEmpty(text)) return; ParserDocument result = getNewParserDocument(); result.add(CONTENT, text); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (extractor != null) IOUtils.closeQuietly(extractor); } }