List of usage examples for org.apache.poi.xssf.extractor XSSFExcelExtractor setIncludeSheetNames
public void setIncludeSheetNames(boolean includeSheetNames)
From source file:com.frameworkset.platform.cms.searchmanager.extractors.CmsExtractorMsExcel.java
License:Open Source License
/** * ?excel2007 /* www . jav a2 s . c om*/ * @param path * @return * @throws IOException */ public String readExcel2007(InputStream in) throws IOException { // StringBuffer content = new StringBuffer(); // XSSFWorkbook strPath String content = null; XSSFWorkbook xwb = new XSSFWorkbook(in); XSSFExcelExtractor extractor = new XSSFExcelExtractor(xwb); extractor.setFormulasNotResults(true); extractor.setIncludeSheetNames(false); content = extractor.getText(); this.cp = extractor.getCoreProperties(); return content; // // Sheet // for (int numSheet = 0; numSheet < xwb.getNumberOfSheets(); numSheet++) { // XSSFSheet xSheet = xwb.getSheetAt(numSheet); // if (xSheet == null) { // continue; // } // // Row // for (int rowNum = 0; rowNum <= xSheet.getLastRowNum(); rowNum++) { // XSSFRow xRow = xSheet.getRow(rowNum); // if (xRow == null) { // continue; // } // // Cell // for (int cellNum = 0; cellNum <= xRow.getLastCellNum(); cellNum++) { // XSSFCell xCell = xRow.getCell(cellNum); // if (xCell == null) { // continue; // } // if (xCell.getCellType() == XSSFCell.CELL_TYPE_BOOLEAN) { // content.append(xCell.getBooleanCellValue()); // } else if (xCell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC) { // content.append(xCell.getNumericCellValue()); // } else { // content.append(xCell.getStringCellValue()); // } // } // } // } // // return content.toString(); }
From source file:com.jaeksoft.searchlib.parser.XlsxParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException { XSSFWorkbook workbook = new XSSFWorkbook(streamLimiter.getNewInputStream()); XSSFExcelExtractor excelExtractor = null; try {/* ww w .ja v a2 s . co m*/ excelExtractor = new XSSFExcelExtractor(workbook); ParserResultItem result = getNewParserResultItem(); CoreProperties info = excelExtractor.getCoreProperties(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.creator, info.getCreator()); result.addField(ParserFieldEnum.subject, info.getSubject()); result.addField(ParserFieldEnum.description, info.getDescription()); result.addField(ParserFieldEnum.keywords, info.getKeywords()); } excelExtractor.setIncludeCellComments(true); excelExtractor.setIncludeHeadersFooters(true); excelExtractor.setIncludeSheetNames(true); String content = excelExtractor.getText(); result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " ")); result.langDetection(10000, ParserFieldEnum.content); } finally { IOUtils.close(excelExtractor); } }
From source file:com.opensearchserver.extractor.parser.Xlsx.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception { XSSFWorkbook workbook = null;//from w w w . j ava 2 s. c o m XSSFExcelExtractor excelExtractor = null; try { workbook = new XSSFWorkbook(inputStream); excelExtractor = new XSSFExcelExtractor(workbook); CoreProperties info = excelExtractor.getCoreProperties(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(CREATOR, info.getCreator()); metas.add(CREATION_DATE, info.getCreated()); metas.add(MODIFICATION_DATE, info.getModified()); metas.add(SUBJECT, info.getSubject()); metas.add(DESCRIPTION, info.getDescription()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument result = getNewParserDocument(); excelExtractor.setIncludeCellComments(true); excelExtractor.setIncludeHeadersFooters(true); excelExtractor.setIncludeSheetNames(true); result.add(CONTENT, excelExtractor.getText()); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (excelExtractor != null) IOUtils.closeQuietly(excelExtractor); if (workbook != null) IOUtils.closeQuietly(workbook); } }
From source file:com.opensearchserver.textextractor.parser.Xlsx.java
License:Apache License
private void parseContent(XSSFWorkbook workbook) throws Exception { XSSFExcelExtractor excelExtractor = null; try {/*from w w w .j a v a 2s . c o m*/ excelExtractor = new XSSFExcelExtractor(workbook); CoreProperties info = excelExtractor.getCoreProperties(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(CREATOR, info.getCreator()); metas.add(CREATION_DATE, info.getCreated()); metas.add(MODIFICATION_DATE, info.getModified()); metas.add(SUBJECT, info.getSubject()); metas.add(DESCRIPTION, info.getDescription()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument result = getNewParserDocument(); excelExtractor.setIncludeCellComments(true); excelExtractor.setIncludeHeadersFooters(true); excelExtractor.setIncludeSheetNames(true); result.add(CONTENT, excelExtractor.getText()); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (excelExtractor != null) IOUtils.closeQuietly(excelExtractor); } }