Java tutorial
/* * ==================================================================== * * Licensed to the Apache Software Foundation (ASF) under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. * * ==================================================================== */ package com.vaadin.addon.tableexport; import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener; import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; import org.apache.poi.hssf.eventusermodel.HSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFRequest; import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener; import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord; import org.apache.poi.hssf.model.HSSFFormulaParser; import org.apache.poi.hssf.record.BOFRecord; import org.apache.poi.hssf.record.BlankRecord; import org.apache.poi.hssf.record.BoolErrRecord; import org.apache.poi.hssf.record.BoundSheetRecord; import org.apache.poi.hssf.record.FormulaRecord; import org.apache.poi.hssf.record.LabelRecord; import org.apache.poi.hssf.record.LabelSSTRecord; import org.apache.poi.hssf.record.NoteRecord; import org.apache.poi.hssf.record.NumberRecord; import org.apache.poi.hssf.record.RKRecord; import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.SSTRecord; import org.apache.poi.hssf.record.StringRecord; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import java.io.Serializable; import java.util.ArrayList; /** * A XLS -> CSV processor, that uses the MissingRecordAware EventModel code to ensure it outputs all * columns and rows. * * @author Nick Burch */ public class XLS2CSVmra implements HSSFListener, Serializable { private int minColumns; private POIFSFileSystem fs; private PrintStream output; private int lastRowNumber; private int lastColumnNumber; /** Should we output the formula, or the value it has? */ private boolean outputFormulaValues = true; /** For parsing Formulas */ private SheetRecordCollectingListener workbookBuildingListener; private HSSFWorkbook stubWorkbook; // Records we pick up as we process private SSTRecord sstRecord; private FormatTrackingHSSFListener formatListener; /** So we known which sheet we're on */ @SuppressWarnings("unused") private int sheetIndex = -1; private BoundSheetRecord[] orderedBSRs; @SuppressWarnings("rawtypes") private ArrayList boundSheetRecords = new ArrayList(); // For handling formulas with string results private int nextRow; private int nextColumn; private boolean outputNextStringRecord; /** * Creates a new XLS -> CSV converter * * @param fs * The POIFSFileSystem to process * @param output * The PrintStream to output the CSV to * @param minColumns * The minimum number of columns to output, or -1 for no minimum */ public XLS2CSVmra(final POIFSFileSystem fs, final PrintStream output, final int minColumns) { this.fs = fs; this.output = output; this.minColumns = minColumns; } /** * Creates a new XLS -> CSV converter * * @param filename * The file to process * @param minColumns * The minimum number of columns to output, or -1 for no minimum * @throws IOException * @throws FileNotFoundException */ public XLS2CSVmra(final String filename, final int minColumns) throws IOException, FileNotFoundException { this(new POIFSFileSystem(new FileInputStream(filename)), System.out, minColumns); } /** * Initiates the processing of the XLS file to CSV */ public void process() throws IOException { final MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this); formatListener = new FormatTrackingHSSFListener(listener); final HSSFEventFactory factory = new HSSFEventFactory(); final HSSFRequest request = new HSSFRequest(); if (outputFormulaValues) { request.addListenerForAllRecords(formatListener); } else { workbookBuildingListener = new SheetRecordCollectingListener(formatListener); request.addListenerForAllRecords(workbookBuildingListener); } factory.processWorkbookEvents(request, fs); } /** * Main HSSFListener method, processes events, and outputs the CSV as the file is processed. */ @SuppressWarnings("unchecked") @Override public void processRecord(final Record record) { int thisRow = -1; int thisColumn = -1; String thisStr = null; switch (record.getSid()) { case BoundSheetRecord.sid: boundSheetRecords.add(record); break; case BOFRecord.sid: final BOFRecord br = (BOFRecord) record; if (br.getType() == BOFRecord.TYPE_WORKSHEET) { // Create sub workbook if required if ((workbookBuildingListener != null) && (stubWorkbook == null)) { stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook(); } // Output the worksheet name // Works by ordering the BSRs by the location of // their BOFRecords, and then knowing that we // process BOFRecords in byte offset order sheetIndex++; if (orderedBSRs == null) { orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords); } // uncomment the below if you want to output the worksheet name // output.println(); // output.println(orderedBSRs[sheetIndex].getSheetname() + " [" + (sheetIndex + // 1) // + "]:"); } break; case SSTRecord.sid: sstRecord = (SSTRecord) record; break; case BlankRecord.sid: final BlankRecord brec = (BlankRecord) record; thisRow = brec.getRow(); thisColumn = brec.getColumn(); thisStr = ""; break; case BoolErrRecord.sid: final BoolErrRecord berec = (BoolErrRecord) record; thisRow = berec.getRow(); thisColumn = berec.getColumn(); thisStr = ""; break; case FormulaRecord.sid: final FormulaRecord frec = (FormulaRecord) record; thisRow = frec.getRow(); thisColumn = frec.getColumn(); if (outputFormulaValues) { if (Double.isNaN(frec.getValue())) { // Formula result is a string // This is stored in the next record outputNextStringRecord = true; nextRow = frec.getRow(); nextColumn = frec.getColumn(); } else { thisStr = formatListener.formatNumberDateCell(frec); } } else { thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"'; } break; case StringRecord.sid: if (outputNextStringRecord) { // String for formula final StringRecord srec = (StringRecord) record; thisStr = srec.getString(); thisRow = nextRow; thisColumn = nextColumn; outputNextStringRecord = false; } break; case LabelRecord.sid: final LabelRecord lrec = (LabelRecord) record; thisRow = lrec.getRow(); thisColumn = lrec.getColumn(); thisStr = '"' + lrec.getValue() + '"'; break; case LabelSSTRecord.sid: final LabelSSTRecord lsrec = (LabelSSTRecord) record; thisRow = lsrec.getRow(); thisColumn = lsrec.getColumn(); if (sstRecord == null) { thisStr = '"' + "(No SST Record, can't identify string)" + '"'; } else { thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex()).toString() + '"'; } break; case NoteRecord.sid: final NoteRecord nrec = (NoteRecord) record; thisRow = nrec.getRow(); thisColumn = nrec.getColumn(); // TODO: Find object to match nrec.getShapeId() thisStr = '"' + "(TODO)" + '"'; break; case NumberRecord.sid: final NumberRecord numrec = (NumberRecord) record; thisRow = numrec.getRow(); thisColumn = numrec.getColumn(); // Format thisStr = formatListener.formatNumberDateCell(numrec); break; case RKRecord.sid: final RKRecord rkrec = (RKRecord) record; thisRow = rkrec.getRow(); thisColumn = rkrec.getColumn(); thisStr = '"' + "(TODO)" + '"'; break; default: break; } // Handle new row if ((thisRow != -1) && (thisRow != lastRowNumber)) { lastColumnNumber = -1; } // Handle missing column if (record instanceof MissingCellDummyRecord) { final MissingCellDummyRecord mc = (MissingCellDummyRecord) record; thisRow = mc.getRow(); thisColumn = mc.getColumn(); thisStr = ""; } // If we got something to print out, do so if (thisStr != null) { if (thisColumn > 0) { output.print(','); } output.print(thisStr); } // Update column and row count if (thisRow > -1) lastRowNumber = thisRow; if (thisColumn > -1) lastColumnNumber = thisColumn; // Handle end of row if (record instanceof LastCellOfRowDummyRecord) { // Print out any missing commas if needed if (minColumns > 0) { // Columns are 0 based if (lastColumnNumber == -1) { lastColumnNumber = 0; } for (int i = lastColumnNumber; i < (minColumns); i++) { output.print(','); } } // We're onto a new row lastColumnNumber = -1; // End the row output.println(); } } public static void main(final String[] args) throws Exception { if (args.length < 1) { System.err.println("Use:"); System.err.println(" XLS2CSVmra <xls file> [min columns]"); System.exit(1); } int minColumns = -1; if (args.length >= 2) { minColumns = Integer.parseInt(args[1]); } final XLS2CSVmra xls2csv = new XLS2CSVmra(args[0], minColumns); xls2csv.process(); } }