Example usage for org.apache.poi.xssf.eventusermodel XSSFBReader getSheetsData

List of usage examples for org.apache.poi.xssf.eventusermodel XSSFBReader getSheetsData

Introduction

In this page you can find the example usage for org.apache.poi.xssf.eventusermodel XSSFBReader getSheetsData.

Prototype

@Override
public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException 

Source Link

Document

Returns an Iterator which will let you get at all the different Sheets in turn.

Usage

From source file:org.apache.tika.parser.microsoft.ooxml.XSSFBExcelExtractorDecorator.java

License:Apache License

/**
 * @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText()
 *///ww  w . ja  va2  s.co  m
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();

    XSSFBSharedStringsTable strings;
    XSSFBReader.SheetIterator iter;
    XSSFBReader xssfReader;
    XSSFBStylesTable styles;
    try {
        xssfReader = new XSSFBReader(container);
        String originalPath = xssfReader.getAbsPathMetadata();
        if (originalPath != null) {
            metadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, originalPath);
        }
        styles = xssfReader.getXSSFBStylesTable();
        iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData();
        strings = new XSSFBSharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }

    while (iter.hasNext()) {
        InputStream stream = iter.next();
        PackagePart sheetPart = iter.getSheetPart();
        addDrawingHyperLinks(sheetPart);
        sheetParts.add(sheetPart);

        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(config, xhtml);
        XSSFBCommentsTable comments = iter.getXSSFBSheetComments();

        // Start, and output the sheet name
        xhtml.startElement("div");
        xhtml.element("h1", iter.getSheetName());

        // Extract the main sheet contents
        xhtml.startElement("table");
        xhtml.startElement("tbody");

        processSheet(sheetExtractor, comments, styles, strings, stream);

        xhtml.endElement("tbody");
        xhtml.endElement("table");

        // Output any headers and footers
        // (Need to process the sheet to get them, so we can't
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        List<XSSFShape> shapes = iter.getShapes();

        processShapes(shapes, xhtml);

        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}