com.bayareasoftware.chartengine.ds.util.XLS2Data.java Source code

Java tutorial

Introduction

Here is the source code for com.bayareasoftware.chartengine.ds.util.XLS2Data.java

Source

/*
 * Copyright 2008-2010 Bay Area Software, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. 
 */
package com.bayareasoftware.chartengine.ds.util;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.HSSFUserException;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

//import com.bayareasoftware.chartengine.ds.DSTestUtil;

//import com.bayareasoftware.chartengine.ds.DSTestUtil;

public class XLS2Data implements HSSFListener {
    private int maxRows;
    private POIFSFileSystem fs;

    private int lastRowNumber;
    private int lastColumnNumber;

    /** limit data only to specified sheet name **/
    private String requiredSheetName;
    /* the name of the sheet we're currently parsing */
    private String currentSheetName;

    /** Should we output the formula, or the value it has? */
    private boolean outputFormulaValues = true;

    /** For parsing Formulas */
    private SheetRecordCollectingListener workbookBuildingListener;
    private HSSFWorkbook stubWorkbook;

    // Records we pick up as we process
    private SSTRecord sstRecord;
    private FormatTrackingHSSFListener formatListener;

    /** So we known which sheet we're on */
    private int sheetIndex = -1;
    private BoundSheetRecord[] orderedBSRs;
    private ArrayList boundSheetRecords = new ArrayList();

    // For handling formulas with string results
    private int nextRow;
    private int nextColumn;
    private boolean outputNextStringRecord;
    private Map<String, List<String[]>> sheet2data = new HashMap<String, List<String[]>>();
    private List<String[]> currentData = new ArrayList<String[]>();
    private List<String> currentRow = new ArrayList<String>();

    /**
     * Creates a new XLS -> Data List converter
     * 
     * @param fs
     *      The POIFSFileSystem to process
     * @param minColumns
     *      The minimum number of columns to output, or -1 for no minimum
     */
    private XLS2Data(POIFSFileSystem fs, String sheetName, int maxRows) {
        this.fs = fs;
        this.maxRows = maxRows;
        this.requiredSheetName = sheetName;
    }

    private XLS2Data(InputStream is, String sheetName) throws IOException {
        this(is, sheetName, Integer.MAX_VALUE);
    }

    /**
     * Creates a new XLS -> CSV converter
     * 
     * @param filename
     *      The file to process
     * @param minColumns
     *      The minimum number of columns to output, or -1 for no minimum
     * @throws IOException
     * @throws FileNotFoundException
     */
    public XLS2Data(InputStream is, String sheetName, int maxRows) throws IOException {
        this(new POIFSFileSystem(is), sheetName, maxRows);
    }

    private void startNewSheet() {
        if (currentSheetName != null) {
            sheet2data.put(this.currentSheetName, currentData);
        }
        currentData = new ArrayList<String[]>();
    }

    private void startNewRow() {
        int sz = currentRow.size();
        String[] sa = new String[sz];
        currentRow.toArray(sa);
        // convert empty strings to null
        for (int i = 0; i < sz; i++) {
            if (sa[i].length() == 0) {
                sa[i] = null;
            }
        }
        if (currentData.size() < maxRows) {
            currentData.add(sa);
        }
        currentRow.clear();
        if (currentData.size() >= maxRows) {
            //throw new HSSFUserException("max rows limit reached: " + maxRows);            
            //throw new RuntimeException("max rows limit reached: " + maxRows);
            throw new MaxRowsReachedException(maxRows);
        }

    }

    private static class MaxRowsReachedException extends RuntimeException {
        MaxRowsReachedException(int num) {
            super("maximum rows reached: " + num);
        }
    }

    private void appendCell(String value) {
        if (value == null) {
            value = "";
        }
        currentRow.add(value);
    }

    public List<String[]> getData() throws Exception {
        List<String[]> ret = null;
        // return specified sheet name, or else first sheet
        if (this.requiredSheetName != null) {
            ret = getData(requiredSheetName);
            if (ret == null) {
                throw new Exception("Required sheet name of: " + requiredSheetName + " is missing in spreadsheet");
            }
        } else {
            //        if (ret == null) {
            List<String> names = getSheetNames();
            if (names.size() > 0) {
                ret = getData(names.get(0));
            }
        }
        return ret;
    }

    public List<String[]> getData(String sheetName) {
        return sheet2data.get(sheetName);
    }

    public List<String> getSheetNames() {
        List<String> ret = new ArrayList<String>();
        if (this.orderedBSRs != null) {
            for (BoundSheetRecord bsr : orderedBSRs) {
                ret.add(bsr.getSheetname());
            }
        }
        return ret;
    }

    public void process() throws IOException {
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
        formatListener = new FormatTrackingHSSFListener(listener);

        HSSFEventFactory factory = new HSSFEventFactory();
        HSSFRequest request = new HSSFRequest();

        if (outputFormulaValues) {
            request.addListenerForAllRecords(formatListener);
        } else {
            workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
            request.addListenerForAllRecords(workbookBuildingListener);
        }
        try {
            // we can handle this when we only want to read a particular sheet
            // and use this exception when we've moved beyond that sheet
            factory.abortableProcessWorkbookEvents(request, fs);
        } catch (HSSFUserException hssue) {
            hssue.printStackTrace();
        } catch (MaxRowsReachedException re) {
            System.out.println("aborted? " + re);
        }
        // make sure we got the last sheet
        if (currentSheetName != null && sheet2data.get(currentSheetName) == null) {
            sheet2data.put(currentSheetName, currentData);
        }
        //factory.processWorkbookEvents(request, fs);        
    }

    /**
     * Main HSSFListener method, processes events, and outputs the CSV as the
     * file is processed.
     */
    public void processRecord(Record record) {
        int thisRow = -1;
        int thisColumn = -1;
        String thisStr = null;

        //p("looking at " + record.getClass().getName() + " sid=0x" + Integer.toHexString(record.getSid()));
        if ((record.getSid() != BoundSheetRecord.sid && record.getSid() != BOFRecord.sid)
                && currentSheetName != null && this.requiredSheetName != null
                && !this.requiredSheetName.equals(currentSheetName)) {
            return;
        }
        switch (record.getSid()) {
        case BoundSheetRecord.sid:
            boundSheetRecords.add(record);
            break;
        case BOFRecord.sid:
            BOFRecord br = (BOFRecord) record;
            if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
                // Create sub workbook if required
                if (workbookBuildingListener != null && stubWorkbook == null) {
                    stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
                }

                // Output the worksheet name
                // Works by ordering the BSRs by the location of
                // their BOFRecords, and then knowing that we
                // process BOFRecords in byte offset order
                sheetIndex++;
                if (orderedBSRs == null) {
                    orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
                }
                //output.println();
                startNewSheet();
                currentSheetName = orderedBSRs[sheetIndex].getSheetname();
                //System.err.println("got sheet name '" + currentSheetName + "' required='"
                //      + this.requiredSheetName + "'");
                /*if (requiredSheetName == null) {
                startNewRow();
                //output.println(orderedBSRs[sheetIndex].getSheetname() + " ["
                //+ (sheetIndex + 1) + "]:");
                appendCell(currentSheetName + " ["
                        + (sheetIndex + 1) + "]:");
                startNewRow();
                }
                */
            } else if (br.getType() == BOFRecord.TYPE_CHART) {
                //p("got chart");
            }
            break;

        case SSTRecord.sid:
            sstRecord = (SSTRecord) record;
            break;

        case BlankRecord.sid:
            BlankRecord brec = (BlankRecord) record;

            thisRow = brec.getRow();
            thisColumn = brec.getColumn();
            thisStr = "";
            break;
        case BoolErrRecord.sid:
            BoolErrRecord berec = (BoolErrRecord) record;

            thisRow = berec.getRow();
            thisColumn = berec.getColumn();
            thisStr = "";
            break;

        case FormulaRecord.sid:
            FormulaRecord frec = (FormulaRecord) record;

            thisRow = frec.getRow();
            thisColumn = frec.getColumn();

            if (outputFormulaValues) {
                if (Double.isNaN(frec.getValue())) {
                    // Formula result is a string
                    // This is stored in the next record
                    outputNextStringRecord = true;
                    nextRow = frec.getRow();
                    nextColumn = frec.getColumn();
                } else {
                    thisStr = "" + frec.getValue();
                    //thisStr = formatListener.formatNumberDateCell(frec);
                }
            } else {
                thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
            }
            break;
        case StringRecord.sid:
            if (outputNextStringRecord) {
                // String for formula
                StringRecord srec = (StringRecord) record;
                thisStr = srec.getString();
                thisRow = nextRow;
                thisColumn = nextColumn;
                outputNextStringRecord = false;
            }
            break;

        case LabelRecord.sid:
            LabelRecord lrec = (LabelRecord) record;

            thisRow = lrec.getRow();
            thisColumn = lrec.getColumn();
            //thisStr = '"' + lrec.getValue() + '"';
            thisStr = lrec.getValue();
            break;
        case LabelSSTRecord.sid:
            LabelSSTRecord lsrec = (LabelSSTRecord) record;

            thisRow = lsrec.getRow();
            thisColumn = lsrec.getColumn();
            if (sstRecord == null) {
                thisStr = '"' + "(No SST Record, can't identify string)" + '"';
            } else {
                //thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex())
                //      .toString() + '"';
                thisStr = sstRecord.getString(lsrec.getSSTIndex()).toString();
            }
            break;
        case NoteRecord.sid:
            NoteRecord nrec = (NoteRecord) record;

            thisRow = nrec.getRow();
            thisColumn = nrec.getColumn();
            // TODO: Find object to match nrec.getShapeId()
            thisStr = '"' + "(TODO - NoteRecord (" + thisRow + "," + thisColumn + "))" + '"';
            break;
        case NumberRecord.sid:
            NumberRecord numrec = (NumberRecord) record;

            thisRow = numrec.getRow();
            thisColumn = numrec.getColumn();

            // Format
            thisStr = formatListener.formatNumberDateCell(numrec);
            break;
        case RKRecord.sid:
            RKRecord rkrec = (RKRecord) record;

            thisRow = rkrec.getRow();
            thisColumn = rkrec.getColumn();
            thisStr = '"' + "(TODO - RKRecord (" + thisRow + "," + thisColumn + "))" + '"';
            break;
        default:
            break;
        }

        // Handle new row
        if (thisRow != -1 && thisRow != lastRowNumber) {
            lastColumnNumber = -1;
        }

        // Handle missing column
        if (record instanceof MissingCellDummyRecord) {
            MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
            thisRow = mc.getRow();
            thisColumn = mc.getColumn();
            thisStr = "";
        }

        // If we got something to print out, do so
        if (thisStr != null) {
            appendCell(thisStr);
        }

        // Update column and row count
        if (thisRow > -1)
            lastRowNumber = thisRow;
        if (thisColumn > -1)
            lastColumnNumber = thisColumn;

        // Handle end of row
        if (record instanceof LastCellOfRowDummyRecord) {
            // We're onto a new row
            lastColumnNumber = -1;

            // End the row
            //output.println();
            startNewRow();
        }
    }

    //    static boolean closed = false;
}