List of usage examples for org.apache.poi.hssf.extractor ExcelExtractor ExcelExtractor
public ExcelExtractor(DirectoryNode dir) throws IOException
From source file:com.bayareasoftware.chartengine.ds.util.ExcelDump.java
License:Apache License
private static void runNew(String fileName) throws Exception { InputStream inp = new FileInputStream(fileName); HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp)); ExcelExtractor xt = new ExcelExtractor(wb); xt.setFormulasNotResults(false);//from ww w . jav a2 s.com xt.setIncludeBlankCells(true); xt.setIncludeSheetNames(false); String text = xt.getText(); String[] lines = StringUtil.splitCompletely(text, '\n'); for (int i = 0; i < lines.length; i++) { System.out.println((i + 1) + ") " + lines[i]); } System.out.println("XLS: \n" + text); }
From source file:com.docdoku.server.esindexer.ESTools.java
License:Open Source License
private static String microsoftExcelDocumentToString(InputStream inputStream) throws IOException, OpenXML4JException, XmlException { StringBuilder sb = new StringBuilder(); try (InputStream excelStream = new BufferedInputStream(inputStream)) { if (POIFSFileSystem.hasPOIFSHeader(excelStream)) { // Before 2007 format files POIFSFileSystem excelFS = new POIFSFileSystem(excelStream); ExcelExtractor excelExtractor = new ExcelExtractor(excelFS); sb.append(excelExtractor.getText()); } else { // New format XSSFWorkbook workBook = new XSSFWorkbook(excelStream); int numberOfSheets = workBook.getNumberOfSheets(); for (int i = 0; i < numberOfSheets; i++) { XSSFSheet sheet = workBook.getSheetAt(0); Iterator<Row> rowIterator = sheet.rowIterator(); while (rowIterator.hasNext()) { XSSFRow row = (XSSFRow) rowIterator.next(); Iterator<Cell> cellIterator = row.cellIterator(); while (cellIterator.hasNext()) { XSSFCell cell = (XSSFCell) cellIterator.next(); sb.append(cell.toString()); sb.append(" "); }/*from w ww.j a va 2 s. com*/ sb.append("\n"); } sb.append("\n"); } } } return sb.toString(); }
From source file:com.frameworkset.platform.cms.searchmanager.extractors.CmsExtractorMsExcel.java
License:Open Source License
/** * ?excel2003 /*from ww w .ja v a 2 s .c o m*/ * @param path * @return * @throws IOException */ public String readExcel(InputStream in) throws IOException { String content = null; try { HSSFWorkbook wb = new HSSFWorkbook(in); ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setFormulasNotResults(true); extractor.setIncludeSheetNames(false); content = extractor.getText(); this.m_documentSummary = extractor.getDocSummaryInformation(); this.m_summary = extractor.getSummaryInformation(); } catch (FileNotFoundException e) { e.printStackTrace(); } return content; }
From source file:com.isotrol.impe3.idx.oc.extractors.ExtractorMsExcel.java
License:Open Source License
/** * Extrae el texto de un fichero excel.//from w w w. ja va2 s .c om * @param in * @return String. Devuelve el texto crudo * @throws Exception */ public static String extractText(InputStream in) throws Exception { String result = ""; HSSFWorkbook wb = new HSSFWorkbook(in); ExcelExtractor ee = new ExcelExtractor(wb); result = ee.getText(); // Eliminamos los caracteres que no nos sirven para indexar. result = ExtractorUtil.removeControlChars(result); return result; }
From source file:com.jaeksoft.searchlib.parser.XlsParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException { HSSFWorkbook workbook = new HSSFWorkbook(streamLimiter.getNewInputStream()); ExcelExtractor excel = null;/* w w w . j a v a 2s.c om*/ try { excel = new ExcelExtractor(workbook); ParserResultItem result = getNewParserResultItem(); SummaryInformation info = excel.getSummaryInformation(); if (info != null) { result.addField(ParserFieldEnum.title, info.getTitle()); result.addField(ParserFieldEnum.author, info.getAuthor()); result.addField(ParserFieldEnum.subject, info.getSubject()); } String content = excel.getText(); result.addField(ParserFieldEnum.content, StringUtils.replaceConsecutiveSpaces(content, " ")); result.langDetection(10000, ParserFieldEnum.content); } finally { IOUtils.close(excel); } }
From source file:com.mycompany.devisetty_mavenlocalrepositorydemo.POIDriver.java
/** * @param args the command line arguments *//* w w w .jav a 2 s .c o m*/ public static void main(String[] args) throws IOException { // TODO code application logic here //Create POI file system object. POIFSFileSystem objPOIFileSystem = new POIFSFileSystem(new FileInputStream("sample.xls")); //Create a data extractor using file system object. ExcelExtractor datExtractor = new ExcelExtractor(objPOIFileSystem); //Extract data using extractor and print System.out.println(datExtractor.getText()); }
From source file:com.mycompany.mavenlocalrepository.POIDriver.java
public static void main(String[] args) throws FileNotFoundException, IOException { //Create POI file system object. POIFSFileSystem objPOIFileSystem = new POIFSFileSystem(new FileInputStream("Sample.xls")); //Create a data extractor using file system object. ExcelExtractor datExtractor = new ExcelExtractor(objPOIFileSystem); //Extract data using extractor and print System.out.println(datExtractor.getText()); }
From source file:com.openkm.extractor.MsExcelTextExtractor.java
License:Open Source License
/** * {@inheritDoc}/*from w w w . j a v a 2 s . c o m*/ */ public String extractText(InputStream stream, String type, String encoding) throws IOException { try { POIFSFileSystem fs = new POIFSFileSystem(stream); return new ExcelExtractor(fs).getText(); } catch (RuntimeException e) { logger.warn("Failed to extract Excel text content", e); throw new IOException(e.getMessage(), e); } finally { stream.close(); } }
From source file:com.openkm.util.metadata.MetadataExtractor.java
License:Open Source License
/** * Extract metadata from Office Word/*from w w w. j a v a2s . com*/ */ public static OfficeMetadata officeExtractor(InputStream is, String mimeType) throws IOException { POIFSFileSystem fs = new POIFSFileSystem(is); OfficeMetadata md = new OfficeMetadata(); SummaryInformation si = null; if (MimeTypeConfig.MIME_MS_WORD.equals(mimeType)) { si = new WordExtractor(fs).getSummaryInformation(); } else if (MimeTypeConfig.MIME_MS_EXCEL.equals(mimeType)) { si = new ExcelExtractor(fs).getSummaryInformation(); } else if (MimeTypeConfig.MIME_MS_POWERPOINT.equals(mimeType)) { si = new PowerPointExtractor(fs).getSummaryInformation(); } if (si != null) { md.setTitle(si.getTitle()); md.setSubject(si.getSubject()); md.setAuthor(si.getAuthor()); md.setLastAuthor(si.getLastAuthor()); md.setKeywords(si.getKeywords()); md.setComments(si.getComments()); md.setTemplate(si.getTemplate()); md.setRevNumber(si.getRevNumber()); md.setApplicationName(si.getApplicationName()); md.setEditTime(si.getEditTime()); md.setPageCount(si.getPageCount()); md.setWordCount(si.getWordCount()); md.setCharCount(si.getCharCount()); md.setSecurity(si.getSecurity()); Calendar createDateTime = Calendar.getInstance(); createDateTime.setTime(si.getCreateDateTime()); md.setCreateDateTime(createDateTime); Calendar lastSaveDateTime = Calendar.getInstance(); lastSaveDateTime.setTime(si.getLastSaveDateTime()); md.setLastSaveDateTime(lastSaveDateTime); Calendar lastPrinted = Calendar.getInstance(); lastPrinted.setTime(si.getLastPrinted()); md.setLastPrinted(lastPrinted); } log.info("officeExtractor: {}", md); return md; }
From source file:com.opensearchserver.extractor.parser.Xls.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception { HSSFWorkbook workbook = new HSSFWorkbook(inputStream); ExcelExtractor excel = null;// www. j av a 2 s. co m try { excel = new ExcelExtractor(workbook); SummaryInformation info = excel.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(KEYWORDS, info.getKeywords()); } ParserDocument result = getNewParserDocument(); result.add(CONTENT, excel.getText()); result.add(LANG_DETECTION, languageDetection(CONTENT, 10000)); } finally { if (excel != null) IOUtils.closeQuietly(excel); } }