Example usage for org.apache.poi.xssf.eventusermodel XSSFBReader getAbsPathMetadata

List of usage examples for org.apache.poi.xssf.eventusermodel XSSFBReader getAbsPathMetadata

Introduction

In this page you can find the example usage for org.apache.poi.xssf.eventusermodel XSSFBReader getAbsPathMetadata.

Prototype

public String getAbsPathMetadata() throws IOException 

Source Link

Document

In Excel 2013, the absolute path where the file was last saved may be stored in the XSSFBRecordType#BrtAbsPath15 record.

Usage

From source file:org.apache.tika.parser.microsoft.ooxml.XSSFBExcelExtractorDecorator.java

License:Apache License

/**
 * @see org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor#getText()
 *//*ww  w .  j  a  v  a 2s .  com*/
@Override
protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
    OPCPackage container = extractor.getPackage();

    XSSFBSharedStringsTable strings;
    XSSFBReader.SheetIterator iter;
    XSSFBReader xssfReader;
    XSSFBStylesTable styles;
    try {
        xssfReader = new XSSFBReader(container);
        String originalPath = xssfReader.getAbsPathMetadata();
        if (originalPath != null) {
            metadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, originalPath);
        }
        styles = xssfReader.getXSSFBStylesTable();
        iter = (XSSFBReader.SheetIterator) xssfReader.getSheetsData();
        strings = new XSSFBSharedStringsTable(container);
    } catch (InvalidFormatException e) {
        throw new XmlException(e);
    } catch (OpenXML4JException oe) {
        throw new XmlException(oe);
    }

    while (iter.hasNext()) {
        InputStream stream = iter.next();
        PackagePart sheetPart = iter.getSheetPart();
        addDrawingHyperLinks(sheetPart);
        sheetParts.add(sheetPart);

        SheetTextAsHTML sheetExtractor = new SheetTextAsHTML(config, xhtml);
        XSSFBCommentsTable comments = iter.getXSSFBSheetComments();

        // Start, and output the sheet name
        xhtml.startElement("div");
        xhtml.element("h1", iter.getSheetName());

        // Extract the main sheet contents
        xhtml.startElement("table");
        xhtml.startElement("tbody");

        processSheet(sheetExtractor, comments, styles, strings, stream);

        xhtml.endElement("tbody");
        xhtml.endElement("table");

        // Output any headers and footers
        // (Need to process the sheet to get them, so we can't
        //  do the headers before the contents)
        for (String header : sheetExtractor.headers) {
            extractHeaderFooter(header, xhtml);
        }
        for (String footer : sheetExtractor.footers) {
            extractHeaderFooter(footer, xhtml);
        }
        List<XSSFShape> shapes = iter.getShapes();

        processShapes(shapes, xhtml);

        //for now dump sheet hyperlinks at bottom of page
        //consider a double-pass of the inputstream to reunite hyperlinks with cells/textboxes
        //step 1: extract hyperlink info from bottom of page
        //step 2: process as we do now, but with cached hyperlink relationship info
        extractHyperLinks(sheetPart, xhtml);
        // All done with this sheet
        xhtml.endElement("div");
    }
}