List of usage examples for org.apache.poi.xwpf.extractor XWPFWordExtractor getCoreProperties
public CoreProperties getCoreProperties()
From source file:com.jaeksoft.searchlib.parser.DocxParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException { ParserResultItem result = getNewParserResultItem(); XWPFDocument document = new XWPFDocument(streamLimiter.getNewInputStream()); XWPFWordExtractor word = null; try {/*from w w w. ja va 2s . c o m*/ word = new XWPFWordExtractor(document); CoreProperties info = word.getCoreProperties(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.creator, info.getCreator()); result.addField(ParserFieldEnum.subject, info.getSubject()); result.addField(ParserFieldEnum.description, info.getDescription()); result.addField(ParserFieldEnum.keywords, info.getKeywords()); } String content = word.getText(); result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " ")); result.langDetection(10000, ParserFieldEnum.content); } finally { IOUtils.close(word); } }
From source file:com.opensearchserver.extractor.parser.Docx.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws IOException { XWPFDocument document = new XWPFDocument(inputStream); XWPFWordExtractor word = null; try {//w w w .j a va 2 s . c o m word = new XWPFWordExtractor(document); CoreProperties info = word.getCoreProperties(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(CREATOR, info.getCreator()); metas.add(CREATION_DATE, info.getCreated()); metas.add(MODIFICATION_DATE, info.getModified()); metas.add(SUBJECT, info.getSubject()); metas.add(DESCRIPTION, info.getDescription()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument parserDocument = getNewParserDocument(); parserDocument.add(CONTENT, word.getText()); parserDocument.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { IOUtils.closeQuietly(word); } }
From source file:com.opensearchserver.textextractor.parser.Docx.java
License:Apache License
@Override protected void parseContent(InputStream inputStream) throws IOException { XWPFDocument document = new XWPFDocument(inputStream); XWPFWordExtractor word = null; try {/*from w ww.j a v a 2s . co m*/ word = new XWPFWordExtractor(document); CoreProperties info = word.getCoreProperties(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(CREATOR, info.getCreator()); metas.add(CREATION_DATE, info.getCreated()); metas.add(MODIFICATION_DATE, info.getModified()); metas.add(SUBJECT, info.getSubject()); metas.add(DESCRIPTION, info.getDescription()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument parserDocument = getNewParserDocument(); parserDocument.add(CONTENT, word.getText()); parserDocument.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { IOUtils.closeQuietly(word); } }