List of usage examples for org.apache.poi.xssf.eventusermodel XSSFBReader getSheetsData
@Override public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException
From source file:org.apache.tika.parser.microsoft.ooxml.XSSFBExcelExtractorDecorator.java
License:Apache License
/** * @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText() *///ww w . ja va2 s.co m @Override protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException { OPCPackage container = extractor.getPackage(); XSSFBSharedStringsTable strings; XSSFBReader.SheetIterator iter; XSSFBReader xssfReader; XSSFBStylesTable styles; try { xssfReader = new XSSFBReader(container); String originalPath = xssfReader.getAbsPathMetadata(); if (originalPath != null) { metadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, originalPath); } styles = xssfReader.getXSSFBStylesTable(); iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData(); strings = new XSSFBSharedStringsTable(container); } catch (InvalidFormatException e) { throw new XmlException(e); } catch (OpenXML4JException oe) { throw new XmlException(oe); } while (iter.hasNext()) { InputStream stream = iter.next(); PackagePart sheetPart = iter.getSheetPart(); addDrawingHyperLinks(sheetPart); sheetParts.add(sheetPart); SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(config, xhtml); XSSFBCommentsTable comments = iter.getXSSFBSheetComments(); // Start, and output the sheet name xhtml.startElement("div"); xhtml.element("h1", iter.getSheetName()); // Extract the main sheet contents xhtml.startElement("table"); xhtml.startElement("tbody"); processSheet(sheetExtractor, comments, styles, strings, stream); xhtml.endElement("tbody"); xhtml.endElement("table"); // Output any headers and footers // (Need to process the sheet to get them, so we can't // do the headers before the contents) for (String header : sheetExtractor.headers) { extractHeaderFooter(header, xhtml); } for (String footer : sheetExtractor.footers) { extractHeaderFooter(footer, xhtml); } List<XSSFShape> shapes = iter.getShapes(); processShapes(shapes, xhtml); //for now dump sheet hyperlinks at bottom of page //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes //step 1: extract hyperlink info from bottom of page //step 2: process as we do now, but with cached hyperlink relationship info extractHyperLinks(sheetPart, xhtml); // All done with this sheet xhtml.endElement("div"); } }