Example usage for org.apache.poi.hssf.extractor OldExcelExtractor OldExcelExtractor

List of usage examples for org.apache.poi.hssf.extractor OldExcelExtractor OldExcelExtractor

Introduction

In this page you can find the example usage for org.apache.poi.hssf.extractor OldExcelExtractor OldExcelExtractor.

Prototype

public OldExcelExtractor(DirectoryNode directory) throws IOException 

Source Link

Usage

From source file:org.apache.tika.parser.microsoft.ExcelExtractor.java

License:Apache License

protected void parse(DirectoryNode root, XHTMLContentHandler xhtml, Locale locale)
        throws IOException, SAXException, TikaException {
    if (!root.hasEntry(WORKBOOK_ENTRY)) {
        if (root.hasEntry(BOOK_ENTRY)) {
            // Excel 5 / Excel 95 file
            // Records are in a different structure so needs a
            //  different parser to process them
            OldExcelExtractor extractor = new OldExcelExtractor(root);
            OldExcelParser.parse(extractor, xhtml);
            return;
        } else {/*from  w  w w .j a  v  a 2  s  .c  o  m*/
            // Corrupt file / very old file, just skip text extraction
            return;
        }
    }

    // If a password was supplied, use it, otherwise the default
    Biff8EncryptionKey.setCurrentUserPassword(getPassword());

    // Have the file processed in event mode
    TikaHSSFListener listener = new TikaHSSFListener(xhtml, locale, this);
    listener.processFile(root, isListenForAllRecords());
    listener.throwStoredException();

    for (Entry entry : root) {
        if (entry.getName().startsWith("MBD") && entry instanceof DirectoryEntry) {
            try {
                handleEmbeddedOfficeDoc((DirectoryEntry) entry, xhtml);
            } catch (TikaException e) {
                // ignore parse errors from embedded documents
            }
        }
    }
}

From source file:org.apache.tika.parser.microsoft.OldExcelParser.java

License:Apache License

/**
 * Extracts properties and text from an MS Document input stream
 *//* www.ja  v  a 2s.co  m*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {
    // Open the POI provided extractor
    OldExcelExtractor extractor = new OldExcelExtractor(stream);

    // We can't do anything about metadata, as these old formats
    //  didn't have any stored with them

    // Set the content type
    // TODO Get the version and type, to set as the Content Type

    // Have the text extracted and given to our Content Handler
    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
    parse(extractor, xhtml);
}