no.abmu.common.excel.BaseExcelParserImpl.java Source code

Java tutorial

Introduction

Here is the source code for no.abmu.common.excel.BaseExcelParserImpl.java

Source

/*$Id: BaseExcelParserImpl.java 16075 2011-01-25 08:40:41Z jens $*/
/*
 ****************************************************************************
 *                                                                          *
 *                   (c) Copyright 2005 ABM-utvikling                       *
 *                                                                          *
 * This program is free software; you can redistribute it and/or modify it  *
 * under the terms of the GNU General Public License as published by the    *
 * Free Software Foundation; either version 2 of the License, or (at your   *
 * option) any later version.                                               *
 *                                                                          *
 * This program is distributed in the hope that it will be useful, but      *
 * WITHOUT ANY WARRANTY; without even the implied warranty of               *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General *
 * Public License for more details. http://www.gnu.org/licenses/gpl.html    *
 *                                                                          *
 ****************************************************************************
 */

package no.abmu.common.excel;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.text.DateFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;

import no.abmu.common.constants.LocaleTypeNameConst;
import no.abmu.util.date.DateUtil;
import no.abmu.util.string.StringUtil;
import no.abmu.util.test.Assert;

import org.apache.log4j.Logger;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
 * Base class for excel parsers, which handles contact with excel-file.
 *
 * @author marius.midtvik@zenior.no
 * @author $Author: jens $
 * @version $Rev: 16075 $
 * @date $Date: 2011-01-25 09:40:41 +0100 (Tue, 25 Jan 2011) $
 * @copyright ABM-Utvikling
 *
 */
public class BaseExcelParserImpl implements BaseExcelParser {

    private final Logger logger = Logger.getLogger(BaseExcelParserImpl.class);

    /**
     * Represents the parsed Excel document.
     */
    private HSSFWorkbook workBook;

    /**
     * 
     * The name of the filename to load the excel document from.
     */
    private String excelFileName;

    /**
     * Maps from column name to column index.
     */
    private HashMap<String, Short> columnMap;

    /**
     * Map from value to row number, usefull when handling data over two sheets:
     * this map can be used on the column having the primary key.
     */
    private HashMap<String, Short> valueToRowMap;

    /**
     * The name of the sheet to handle.
     */
    private String sheetName;

    private HSSFSheet sheet;

    private int currentRowIdx;

    /**
     * The index of the row where the data starts.
     */
    private int dataStartRowIdx;

    /**
     * The row having the header (ie column names).
     */
    private int headerRow;

    /**
     * Secondary header row, for sheets having more than on row of headers.
     */
    private int secondaryHeaderRow = -1;

    public BaseExcelParserImpl() {
    }

    /**
     * Construct the parser based on an exists book, usefull when working with two.
     * sheets at the same time.
     *
     * @param workBook
     */
    public BaseExcelParserImpl(HSSFWorkbook workBook, String sheetName, int headerRow) {
        this.workBook = workBook;
        setHeaderRow(headerRow);
        setSheetName(sheetName);

        buildColumnMap();
        reset();

    }

    public HSSFWorkbook getWorkBook() {
        return workBook;
    }

    /**
     * Loads the document and moves to the first row.
     */
    public void load() {
        loadExcelDocument();
        reset();
    }

    /**
     * Returns the index of the row having the specified value in the indexed column.
     *
     * Ensure that the method updateValueToRowMap is called with the correct column index
     * if the value does not exits in the first column.
     *
     * @param value
     * @return Short value holding the index.
     */
    public Short getRowIdxOfValue(String value) {
        if (valueToRowMap == null) {
            /* Be nice, assume the index is on first column */
            updateValueToRowMap(new Short((short) 0));
        }

        return (Short) valueToRowMap.get(value);
    }

    /**
     * Returns the value of the specified column from the row having the
     * specified indexed value.
     *
     * @param value
     * @param columnName
     * @return
     */
    public String getStringByValueAndColumnName(String value, String columnName) {
        Short rowIndex;
        Short columnIndex;
        HSSFRow row = null;

        rowIndex = getRowIdxOfValue(value);
        if (rowIndex == null) {
            return null;
        }

        row = sheet.getRow(rowIndex.intValue());
        if (row == null) {
            return null;
        }

        columnIndex = (Short) columnMap.get(columnName);
        if (columnIndex == null) {
            throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'");
        }

        return getStringValue(row, columnIndex);

    }

    /**
     * This method updates the valueToRowMap based on the specified column.
     * Note that the method will throw an exception if two identical values are found.
     *
     * @param columnIdx the zero based index of the column to build the map from.
     */
    public void updateValueToRowMap(Short columnIdx) {
        HSSFRow row = null;
        String value;

        for (int rowIdx = 0; rowIdx <= sheet.getLastRowNum(); rowIdx++) {
            row = sheet.getRow(rowIdx);

            if (row == null) {
                continue;
            }

            value = getStringValue(row, columnIdx);

            if (value != null && value.trim().length() > 0) {
                if (valueToRowMap == null) {
                    valueToRowMap = new HashMap<String, Short>();
                }

                valueToRowMap.put(value, new Short((short) rowIdx));
            }
        }

    }

    /**
     * Returns the 1-based row number, ie: the first row has row number 1,
     * not 0 as the index.
     *
     * Use this when providing feedback to users.
     *
     * @return
     */
    public int getCurrentRowNumber() {
        return currentRowIdx + 1;
    }

    public int getCurrentRowIdx() {
        return currentRowIdx;
    }

    /**
     * Sets the row index of the header row.
     * @param headerRow
     */
    public void setHeaderRow(int headerRow) {
        this.headerRow = headerRow;
    }

    /**
     * Moves the row pointer to the first data row.
     */
    public void reset() {
        if (dataStartRowIdx > 0) {
            currentRowIdx = dataStartRowIdx;
        } else if (secondaryHeaderRow >= 0) {
            currentRowIdx = secondaryHeaderRow + 1;

        } else {
            currentRowIdx = headerRow + 1;
        }
    }

    /**
     * Moves the the next row.
     */
    public void next() {
        if (hasNext()) {
            currentRowIdx++;
        } else {
            throw new IllegalStateException("Can't move to next row, no more rows.");
        }

    }

    /**
     * Returns true if there are more rows in the sheet.
     *
     * @return
     */
    public boolean hasNext() {
        return (currentRowIdx <= sheet.getLastRowNum());
    }

    /**
     * Returns the number of rows with data, below the header.
     */
    public int countNumberOfRowsWithData() {
        int rowCount = 0;
        String value;
        Short zeroIdx = new Short((short) 0);

        reset();
        for (; hasNext(); next()) {
            value = getString(zeroIdx);
            if (value != null && value.trim().length() > 0) {
                rowCount++;
            }
        }
        reset();

        return rowCount;
    }

    /**
     * Returns a hashmap of all values having column headers matching the specified value.
     * The column header will be the key.
     *
     * Example :
     *
     * Felt 1 Felt 2
     * 1      2
     *
     * Will give the hashmap:
     * "Felt 1" = "1"
     * "Felt 2" = "2"
     *
     * @param columnHeaderPrefix
     * @return
     */
    public HashMap<String, String> getFieldsMatchingHeaderPrefix(String columnHeaderPrefix) {
        String key;
        String value;

        HashMap<String, String> ret = new HashMap<String, String>();

        for (Iterator<String> iter = columnMap.keySet().iterator(); iter.hasNext();) {
            key = iter.next();

            if (!key.startsWith(columnHeaderPrefix)) {
                continue;
            }

            value = getString(key);
            if (value != null) {
                ret.put(key, value);
            }
        }

        return ret;
    }

    /**
     * Returns true if the sheet has a column with the specified name.
     *
     * @param columnName
     * @return true if the column exits, otherwise false.
     */
    public boolean hasColumn(String columnName) {
        Short columnIdx;

        columnIdx = (Short) columnMap.get(columnName);
        return (columnIdx == null ? false : true);
    }

    /**
     * Returns the value of the specified columns in specified order.
     * If neither of the columns exists, null is returned.
     *
     * @param columnName1
     * @param columnName2
     * @return
     */
    public String getStringOrNull(String columnName1, String columnName2) {
        String value;
        if (hasColumn(columnName1)) {
            value = getString(columnName1);
            if (value != null) {
                return value;
            }
        }

        if (hasColumn(columnName2)) {
            value = getString(columnName2);
            if (value != null) {
                return value;
            }
        }

        return null;
    }

    /**
     * Returns the value of the specified columns in specified order.
     * If neither of the columns exists, null is returned.
     *
     * @param columnName1
     * @return
     */
    public String getStringOrNull(String columnName1) {
        String value;
        if (hasColumn(columnName1)) {
            value = getString(columnName1);
            if (value != null) {
                return value;
            }
        }

        return null;

    }

    /**
     * Returns the value of the column having the specified name for the current row.
     * The value is trimmed.
     *
     * @param columnName
     * @return
     * @throws IllegalArgumentException is there are no column having the specified columnName
     */
    public String getTrimmedString(String columnName) {
        String value;

        value = getString(columnName);
        if (value != null) {
            return value.trim();
        }

        return null;
    }

    /**
     * Returns the value of the column having the specified name for the current row.
     *
     * @param columnName
     * @return
     * @throws IllegalArgumentException is there are no column having the specified columnName
     */
    public String getString(String columnName) {
        HSSFRow row;
        Short columnIdx;

        row = sheet.getRow(currentRowIdx);

        if (row == null) {
            return null;
        }

        columnIdx = (Short) columnMap.get(columnName);
        if (columnIdx == null) {
            throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'");
        }

        return getStringValue(row, columnIdx);
    }

    /**
     * Returns the value of the column having the specified name for the current row.
     *
     * @param columnIdx
     * @return
     */
    public String getString(Short columnIdx) {
        HSSFRow row;

        row = sheet.getRow(currentRowIdx);

        if (row == null) {
            return null;
        }

        return getStringValue(row, columnIdx);
    }

    /**
     * Returns the value of the column having the specified name for the current row.
     *
     * @param columnIdx
     * @return
     */
    public Long getLong(Short columnIdx) {
        HSSFRow row;

        row = sheet.getRow(currentRowIdx);

        if (row == null) {
            return null;
        }

        return getLongValue(row, columnIdx);
    }

    /**
     * Returns the value of the column having the specified name for the current row.
     *
     * @param columnName
     * @return
     */
    public Long getLong(String columnName) {
        HSSFRow row;
        Short columnIdx;

        row = sheet.getRow(currentRowIdx);

        if (row == null) {
            return null;
        }

        columnIdx = (Short) columnMap.get(columnName);
        if (columnIdx == null) {
            throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'");
        }

        return getLongValue(row, columnIdx);
    }

    /**
     * Returns the value of the column having the specified name for the current row.
     *
     * @param columnName
     * @return
     */
    public Double getDouble(String columnName) {
        HSSFRow row;
        Short columnIdx;

        row = sheet.getRow(currentRowIdx);

        if (row == null) {
            return null;
        }

        columnIdx = (Short) columnMap.get(columnName);
        if (columnIdx == null) {
            throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'");
        }

        return getDoubleValue(row, columnIdx);
    }

    /**
     * Returns the value of the column having the specified name for the current row.
     *
     * @param columnName
     * @return
     */
    public Date getDate(String columnName) {
        HSSFRow row;
        Short columnIdx;

        row = sheet.getRow(currentRowIdx);

        if (row == null) {
            return null;
        }

        columnIdx = (Short) columnMap.get(columnName);
        if (columnIdx == null) {
            throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'");
        }

        return getDateValue(row, columnIdx, null);
    }

    /**
     * Returns the value of the column having the specified name for the current row.
     *
     * @param columnName
     * @return
     */
    public Date getDate(String columnName, DateFormat customDateFormat) {
        HSSFRow row;
        Short columnIdx;

        row = sheet.getRow(currentRowIdx);

        if (row == null) {
            return null;
        }

        columnIdx = (Short) columnMap.get(columnName);
        if (columnIdx == null) {
            throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'");
        }

        return getDateValue(row, columnIdx, customDateFormat);
    }

    /**
     * Loads the Excel document.
     */
    protected void loadExcelDocument() {
        File file = null;
        POIFSFileSystem poifsFileSystem = null;
        InputStream inputStream = null;

        if (excelFileName == null) {
            logger.error("Can't parse Excel document. No filename specified");
            throw new IllegalStateException("Can't parse Excel document. No filename specified");
        }

        logger.debug("Will load Excel document having filename '" + excelFileName + "'");
        inputStream = this.getClass().getResourceAsStream(excelFileName);
        if (inputStream == null) {
            file = new File(excelFileName);
            if (!file.exists()) {
                logger.error("Can't load Excel document having filename '" + excelFileName
                        + "'. The file does not exist");
                throw new IllegalArgumentException(
                        "Can't parse Excel document. File " + excelFileName + " does not exist");
            }

            if (!file.canRead()) {
                logger.error("Can't load Excel document having filename '" + excelFileName + "'. No read access.");
                throw new IllegalArgumentException("Can't parse Excel document. No access");
            }
            try {
                inputStream = new FileInputStream(file);
            } catch (FileNotFoundException e) {
                logger.error("Can't load Excel document having filename '" + excelFileName
                        + "'. Failed when reading file : '" + e.getMessage() + "'", e);
                throw new IllegalStateException(
                        "Can't parse Excel document. Failed to read file '" + file + "' " + e);
            }
        }
        try {
            poifsFileSystem = new POIFSFileSystem(inputStream);
        } catch (IOException e) {
            logger.error("Can't load Excel document having filename '" + excelFileName
                    + "'. Failed when reading file : '" + e.getMessage() + "'", e);
            throw new IllegalStateException("Can't parse Excel document. Failed to read file '" + file + "' " + e);
        }

        try {
            workBook = new HSSFWorkbook(poifsFileSystem);
        } catch (IOException e) {
            logger.error("Can't load Excel document having filename '" + excelFileName
                    + "'. Failed when parsing file : '" + e.getMessage() + "'", e);
            throw new IllegalStateException("Can't parse Excel document. Failed to parse Excel data.");
        }

        logger.debug("Successfully loaded and parsed Excel document");

        buildColumnMap();
    }

    /**
     * Builds the map from column name to column index.
     */
    protected void buildColumnMap() {
        HSSFRow row;
        Short shortIdx;
        String columnHeaderName;

        sheet = workBook.getSheet(getSheetName());
        if (sheet == null) {
            logger.error("Can't extract information. The Excel document does not have a sheet with name '"
                    + getSheetName() + "'");
            throw new IllegalArgumentException("Can't extract information, sheet not found");
        }

        row = sheet.getRow(headerRow);
        if (row == null) {
            logger.error("Can't extract information. The Excel document is missing first row=[" + headerRow
                    + "] on sheet with name '" + getSheetName() + "'");
            throw new IllegalArgumentException("Can't extract information, missing row on sheet");
        }

        columnMap = new HashMap<String, Short>();
        for (short colIdx = 0; colIdx < row.getLastCellNum(); colIdx++) {

            shortIdx = new Short(colIdx);
            columnHeaderName = getStringValue(row, shortIdx);
            if (columnHeaderName == null) {
                continue;
            }

            columnHeaderName = columnHeaderName.trim();

            columnMap.put(columnHeaderName, shortIdx);

        }

        if (secondaryHeaderRow >= 0) {
            row = sheet.getRow(secondaryHeaderRow);
            if (row == null) {
                logger.error("Can't extract information. The Excel document is secondary row " + secondaryHeaderRow
                        + " (index) on sheet with name '" + getSheetName() + "'");
                throw new IllegalArgumentException("Can't extract information, missing row on sheet");
            }

            for (short colIdx = 0; colIdx < row.getLastCellNum(); colIdx++) {

                shortIdx = new Short(colIdx);
                columnHeaderName = getStringValue(row, shortIdx);
                if (columnHeaderName == null) {
                    continue;
                }

                columnHeaderName = columnHeaderName.trim();
                /* Make sure duplicated header names from the secondary header row
                shadows the indexes from the primary row by just checking of the headername
                has already been added */
                if (!columnMap.containsKey(columnHeaderName)) {
                    columnMap.put(columnHeaderName, shortIdx);
                }
            }

        }
    }

    public List<String> getListOfColumnHeaderName() {
        HSSFRow row;
        Short shortIdx;
        String columnHeaderName;

        sheet = workBook.getSheet(getSheetName());
        if (sheet == null) {
            logger.error("Can't extract information. The Excel document does not have a sheet with name '"
                    + getSheetName() + "'");
            throw new IllegalArgumentException("Can't extract information, sheet not found");
        }

        row = sheet.getRow(headerRow);
        if (row == null) {
            logger.error("Can't extract information. The Excel document is missing first row on sheet with name '"
                    + getSheetName() + "'");
            throw new IllegalArgumentException("Can't extract information, missing row on sheet");
        }

        List<String> listOfColumnHeaderName = new ArrayList<String>();
        for (short colIdx = 0; colIdx < row.getLastCellNum(); colIdx++) {

            shortIdx = new Short(colIdx);
            columnHeaderName = getStringValue(row, shortIdx);
            if (columnHeaderName == null) {
                columnHeaderName = "";
            } else {
                columnHeaderName = columnHeaderName.trim();
            }
            listOfColumnHeaderName.add(columnHeaderName);
        }

        return listOfColumnHeaderName;
    }

    public String getExcelFileName() {
        return excelFileName;
    }

    /**
     * Sets the filename of the Excel document.
     *
     * @param excelFileName
     */
    public void setExcelFileName(String excelFileName) {
        this.excelFileName = excelFileName;
    }

    /**
     * Returns the value of the specified column as a String.
     *
     * @param row
     * @param columnIndex
     * @return
     */
    protected String getStringValue(HSSFRow row, Short columnIndex) {

        HSSFCell cell;

        if (columnIndex == null) {
            return null;
        }

        cell = row.getCell(columnIndex.shortValue());

        if (cell == null) {
            return null;
        }
        switch (cell.getCellType()) {
        case HSSFCell.CELL_TYPE_BLANK:
            return null;
        case HSSFCell.CELL_TYPE_NUMERIC:
            //           System.out.println("cell:"+cell.getNumericCellValue()+", %1:"+cell.getNumericCellValue() % 1+", "+(cell.getNumericCellValue() % 1 == 0));
            if (cell.getNumericCellValue() % 1 == 0) {//is integer
                /* For now - convert to long */
                return Long.toString(new Double(cell.getNumericCellValue()).longValue());
            } else {//decimal
                Locale locale = LocaleTypeNameConst.BOKMAAL;
                NumberFormat numberFormat = NumberFormat.getInstance(locale);
                return numberFormat.format(cell.getNumericCellValue());
            }
        case HSSFCell.CELL_TYPE_STRING:
            return cell.getStringCellValue();
        default:
            return null;

        }

    }

    /**
     * Return the value of the cell as a long.
     *
     * @return
     */
    protected Long getLongValue(HSSFRow row, Short columnIndex) {
        String value;
        HSSFCell cell;

        if (columnIndex == null) {
            return null;
        }

        cell = row.getCell(columnIndex.shortValue());

        if (cell == null) {
            return null;
        }

        switch (cell.getCellType()) {
        case HSSFCell.CELL_TYPE_BLANK:
            return null;
        case HSSFCell.CELL_TYPE_NUMERIC:
            return new Long(new Double(cell.getNumericCellValue()).longValue());
        case HSSFCell.CELL_TYPE_STRING:
            value = cell.getStringCellValue();
            if (value == null) {
                return null;
            } else {
                try {
                    return new Long(value);
                } catch (NumberFormatException nfe) {
                    if (logger.isDebugEnabled()) {
                        logger.debug("NumberFormatException: " + nfe);
                    }
                }
            }
        default:
            return null;
        }
    }

    /**
     * Return the value of the cell as a long.
     *
     * @return
     */
    protected Date getDateValue(HSSFRow row, Short columnIndex, DateFormat customDateFormat) {
        Assert.checkRequiredArgument("row", row);
        Assert.checkRequiredArgument("columnIndex", columnIndex);

        HSSFCell cell = row.getCell(columnIndex.shortValue());

        if (cell == null) {
            return null;
        }

        switch (cell.getCellType()) {
        case HSSFCell.CELL_TYPE_NUMERIC:
            if (HSSFDateUtil.isValidExcelDate(cell.getNumericCellValue())) {
                return cell.getDateCellValue();
            } else {
                return null;
            }
        case HSSFCell.CELL_TYPE_STRING:
            String value = cell.getStringCellValue();
            if (StringUtil.isEmpty(value)) {
                return null;
            }
            // ISO data format are default date format
            DateFormat dateFormat = DateUtil.ISO_DATEFORMAT;
            if (customDateFormat != null) {
                dateFormat = customDateFormat;
            }
            Date returnValue = null;
            try {
                returnValue = DateUtil.parseDateString(dateFormat, value);
            } catch (IllegalArgumentException e) {
                if (logger.isDebugEnabled()) {
                    logger.debug("[getDateValue] IllegalArgumentException: " + e);
                }
            }
            return returnValue;

        default:
            return null;
        }

    }

    /**
     * Return the value of the cell as a long.
     *
     * @return
     */
    protected Double getDoubleValue(HSSFRow row, Short columnIndex) {
        Assert.checkRequiredArgument("row", row);
        Assert.checkRequiredArgument("columnIndex", columnIndex);

        HSSFCell cell = row.getCell(columnIndex.shortValue());

        if (cell == null) {
            return null;
        }

        switch (cell.getCellType()) {
        case HSSFCell.CELL_TYPE_BLANK:
            return null;
        case HSSFCell.CELL_TYPE_NUMERIC:
            return cell.getNumericCellValue();
        case HSSFCell.CELL_TYPE_STRING:
            String value = cell.getStringCellValue();
            if (StringUtil.isEmpty(value)) {
                return null;
            } else {
                Double returnValue = null;
                try {
                    returnValue = new Double(value);
                } catch (NumberFormatException nfe) {
                    if (logger.isDebugEnabled()) {
                        logger.debug("NumberFormatException: " + nfe);
                    }
                }
                return returnValue;
            }
        default:
            return null;
        }
    }

    public String getSheetName() {
        return sheetName;
    }

    public void setSheetName(String sheetName) {
        this.sheetName = sheetName;
    }

    /**
     * Returns the index of the first row having data.
     * @return
     */
    public int getDataStartRowIdx() {
        return dataStartRowIdx;
    }

    /**
     * Sets the index of the first row having data.
     * Please be aware that this will reset the counters - ie move the
     * current row to dataStartRowIdx
     *
     * @param dataStartRowIdx the index
     */
    public void setDataStartRowIdx(int dataStartRowIdx) {
        this.dataStartRowIdx = dataStartRowIdx;
        reset();
    }

    public void setSecondaryHeaderRow(int secondaryHeaderRow) {
        this.secondaryHeaderRow = secondaryHeaderRow;
    }

}