List of usage examples for org.apache.poi.xssf.extractor XSSFExcelExtractor setIncludeHeadersFooters
public void setIncludeHeadersFooters(boolean includeHeadersFooters)
From source file:com.jaeksoft.searchlib.parser.XlsxParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException { XSSFWorkbook workbook = new XSSFWorkbook(streamLimiter.getNewInputStream()); XSSFExcelExtractor excelExtractor = null; try {/*from w ww. java 2 s. c om*/ excelExtractor = new XSSFExcelExtractor(workbook); ParserResultItem result = getNewParserResultItem(); CoreProperties info = excelExtractor.getCoreProperties(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.creator, info.getCreator()); result.addField(ParserFieldEnum.subject, info.getSubject()); result.addField(ParserFieldEnum.description, info.getDescription()); result.addField(ParserFieldEnum.keywords, info.getKeywords()); } excelExtractor.setIncludeCellComments(true); excelExtractor.setIncludeHeadersFooters(true); excelExtractor.setIncludeSheetNames(true); String content = excelExtractor.getText(); result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " ")); result.langDetection(10000, ParserFieldEnum.content); } finally { IOUtils.close(excelExtractor); } }
From source file:com.opensearchserver.extractor.parser.Xlsx.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception { XSSFWorkbook workbook = null;//from w ww.j av a2 s.co m XSSFExcelExtractor excelExtractor = null; try { workbook = new XSSFWorkbook(inputStream); excelExtractor = new XSSFExcelExtractor(workbook); CoreProperties info = excelExtractor.getCoreProperties(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(CREATOR, info.getCreator()); metas.add(CREATION_DATE, info.getCreated()); metas.add(MODIFICATION_DATE, info.getModified()); metas.add(SUBJECT, info.getSubject()); metas.add(DESCRIPTION, info.getDescription()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument result = getNewParserDocument(); excelExtractor.setIncludeCellComments(true); excelExtractor.setIncludeHeadersFooters(true); excelExtractor.setIncludeSheetNames(true); result.add(CONTENT, excelExtractor.getText()); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (excelExtractor != null) IOUtils.closeQuietly(excelExtractor); if (workbook != null) IOUtils.closeQuietly(workbook); } }
From source file:com.opensearchserver.textextractor.parser.Xlsx.java
License:Apache License
private void parseContent(XSSFWorkbook workbook) throws Exception { XSSFExcelExtractor excelExtractor = null; try {/* w w w . ja v a 2s. co m*/ excelExtractor = new XSSFExcelExtractor(workbook); CoreProperties info = excelExtractor.getCoreProperties(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(CREATOR, info.getCreator()); metas.add(CREATION_DATE, info.getCreated()); metas.add(MODIFICATION_DATE, info.getModified()); metas.add(SUBJECT, info.getSubject()); metas.add(DESCRIPTION, info.getDescription()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument result = getNewParserDocument(); excelExtractor.setIncludeCellComments(true); excelExtractor.setIncludeHeadersFooters(true); excelExtractor.setIncludeSheetNames(true); result.add(CONTENT, excelExtractor.getText()); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (excelExtractor != null) IOUtils.closeQuietly(excelExtractor); } }