List of usage examples for org.apache.poi.hssf.extractor ExcelExtractor getText
@Override
public String getText()
From source file:com.bayareasoftware.chartengine.ds.util.ExcelDump.java
License:Apache License
private static void runNew(String fileName) throws Exception { InputStream inp = new FileInputStream(fileName); HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp)); ExcelExtractor xt = new ExcelExtractor(wb); xt.setFormulasNotResults(false);//from w w w .j a va2 s . com xt.setIncludeBlankCells(true); xt.setIncludeSheetNames(false); String text = xt.getText(); String[] lines = StringUtil.splitCompletely(text, '\n'); for (int i = 0; i < lines.length; i++) { System.out.println((i + 1) + ") " + lines[i]); } System.out.println("XLS: \n" + text); }
From source file:com.docdoku.server.esindexer.ESTools.java
License:Open Source License
private static String microsoftExcelDocumentToString(InputStream inputStream) throws IOException, OpenXML4JException, XmlException { StringBuilder sb = new StringBuilder(); try (InputStream excelStream = new BufferedInputStream(inputStream)) { if (POIFSFileSystem.hasPOIFSHeader(excelStream)) { // Before 2007 format files POIFSFileSystem excelFS = new POIFSFileSystem(excelStream); ExcelExtractor excelExtractor = new ExcelExtractor(excelFS); sb.append(excelExtractor.getText()); } else { // New format XSSFWorkbook workBook = new XSSFWorkbook(excelStream); int numberOfSheets = workBook.getNumberOfSheets(); for (int i = 0; i < numberOfSheets; i++) { XSSFSheet sheet = workBook.getSheetAt(0); Iterator<Row> rowIterator = sheet.rowIterator(); while (rowIterator.hasNext()) { XSSFRow row = (XSSFRow) rowIterator.next(); Iterator<Cell> cellIterator = row.cellIterator(); while (cellIterator.hasNext()) { XSSFCell cell = (XSSFCell) cellIterator.next(); sb.append(cell.toString()); sb.append(" "); }/*from w w w. ja v a 2 s. co m*/ sb.append("\n"); } sb.append("\n"); } } } return sb.toString(); }
From source file:com.frameworkset.platform.cms.searchmanager.extractors.CmsExtractorMsExcel.java
License:Open Source License
/** * ?excel2003 //w w w. j a v a2 s . co m * @param path * @return * @throws IOException */ public String readExcel(InputStream in) throws IOException { String content = null; try { HSSFWorkbook wb = new HSSFWorkbook(in); ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setFormulasNotResults(true); extractor.setIncludeSheetNames(false); content = extractor.getText(); this.m_documentSummary = extractor.getDocSummaryInformation(); this.m_summary = extractor.getSummaryInformation(); } catch (FileNotFoundException e) { e.printStackTrace(); } return content; }
From source file:com.isotrol.impe3.idx.oc.extractors.ExtractorMsExcel.java
License:Open Source License
/** * Extrae el texto de un fichero excel./*from ww w. j a v a2 s .co m*/ * @param in * @return String. Devuelve el texto crudo * @throws Exception */ public static String extractText(InputStream in) throws Exception { String result = ""; HSSFWorkbook wb = new HSSFWorkbook(in); ExcelExtractor ee = new ExcelExtractor(wb); result = ee.getText(); // Eliminamos los caracteres que no nos sirven para indexar. result = ExtractorUtil.removeControlChars(result); return result; }
From source file:com.jaeksoft.searchlib.parser.XlsParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException { HSSFWorkbook workbook = new HSSFWorkbook(streamLimiter.getNewInputStream()); ExcelExtractor excel = null; try {//from ww w.j a v a 2 s . c o m excel = new ExcelExtractor(workbook); ParserResultItem result = getNewParserResultItem(); SummaryInformation info = excel.getSummaryInformation(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.author, info.getAuthor()); result.addField(ParserFieldEnum.subject, info.getSubject()); } String content = excel.getText(); result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " ")); result.langDetection(10000, ParserFieldEnum.content); } finally { IOUtils.close(excel); } }
From source file:com.mycompany.devisetty_mavenlocalrepositorydemo.POIDriver.java
/** * @param args the command line arguments *//*from w w w.j a v a2 s.c o m*/ public static void main(String[] args) throws IOException { // TODO code application logic here //Create POI file system object. POIFSFileSystem objPOIFileSystem = new POIFSFileSystem(new FileInputStream("sample.xls")); //Create a data extractor using file system object. ExcelExtractor datExtractor = new ExcelExtractor(objPOIFileSystem); //Extract data using extractor and print System.out.println(datExtractor.getText()); }
From source file:com.mycompany.mavenlocalrepository.POIDriver.java
public static void main(String[] args) throws FileNotFoundException, IOException { //Create POI file system object. POIFSFileSystem objPOIFileSystem = new POIFSFileSystem(new FileInputStream("Sample.xls")); //Create a data extractor using file system object. ExcelExtractor datExtractor = new ExcelExtractor(objPOIFileSystem); //Extract data using extractor and print System.out.println(datExtractor.getText()); }
From source file:com.opensearchserver.extractor.parser.Xls.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception { HSSFWorkbook workbook = new HSSFWorkbook(inputStream); ExcelExtractor excel = null; try {//from ww w.j av a 2 s .com excel = new ExcelExtractor(workbook); SummaryInformation info = excel.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument result = getNewParserDocument(); result.add(CONTENT, excel.getText()); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (excel != null) IOUtils.closeQuietly(excel); } }
From source file:com.opensearchserver.textextractor.parser.Xls.java
License:Apache License
@Override protected void parseContent(InputStream inputStream) throws Exception { HSSFWorkbook workbook = new HSSFWorkbook(inputStream); ExcelExtractor excel = null; try {/* www.j a v a2 s . c om*/ excel = new ExcelExtractor(workbook); SummaryInformation info = excel.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument result = getNewParserDocument(); result.add(CONTENT, excel.getText()); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (excel != null) IOUtils.closeQuietly(excel); } }
From source file:de.micromata.genome.gwiki.plugin.msotextextractor_1_0.ExcelTextExtractor.java
License:Apache License
public String extractText(String fileName, InputStream data) { try {/* w w w. j a v a 2s .c o m*/ HSSFWorkbook wb = new HSSFWorkbook(data); ExcelExtractor extr = new ExcelExtractor(wb); String text = extr.getText(); text = WordTextExtractor.reworkWordText(text); return text; } catch (IOException ex) { throw new RuntimeIOException("Failure to extract word from " + fileName + "; " + ex.getMessage(), ex); } }