Java tutorial
/*$Id: BaseExcelParserImpl.java 16075 2011-01-25 08:40:41Z jens $*/ /* **************************************************************************** * * * (c) Copyright 2005 ABM-utvikling * * * * This program is free software; you can redistribute it and/or modify it * * under the terms of the GNU General Public License as published by the * * Free Software Foundation; either version 2 of the License, or (at your * * option) any later version. * * * * This program is distributed in the hope that it will be useful, but * * WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General * * Public License for more details. http://www.gnu.org/licenses/gpl.html * * * **************************************************************************** */ package no.abmu.common.excel; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.math.BigDecimal; import java.text.DateFormat; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Locale; import no.abmu.common.constants.LocaleTypeNameConst; import no.abmu.util.date.DateUtil; import no.abmu.util.string.StringUtil; import no.abmu.util.test.Assert; import org.apache.log4j.Logger; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** * Base class for excel parsers, which handles contact with excel-file. * * @author marius.midtvik@zenior.no * @author $Author: jens $ * @version $Rev: 16075 $ * @date $Date: 2011-01-25 09:40:41 +0100 (Tue, 25 Jan 2011) $ * @copyright ABM-Utvikling * */ public class BaseExcelParserImpl implements BaseExcelParser { private final Logger logger = Logger.getLogger(BaseExcelParserImpl.class); /** * Represents the parsed Excel document. */ private HSSFWorkbook workBook; /** * * The name of the filename to load the excel document from. */ private String excelFileName; /** * Maps from column name to column index. */ private HashMap<String, Short> columnMap; /** * Map from value to row number, usefull when handling data over two sheets: * this map can be used on the column having the primary key. */ private HashMap<String, Short> valueToRowMap; /** * The name of the sheet to handle. */ private String sheetName; private HSSFSheet sheet; private int currentRowIdx; /** * The index of the row where the data starts. */ private int dataStartRowIdx; /** * The row having the header (ie column names). */ private int headerRow; /** * Secondary header row, for sheets having more than on row of headers. */ private int secondaryHeaderRow = -1; public BaseExcelParserImpl() { } /** * Construct the parser based on an exists book, usefull when working with two. * sheets at the same time. * * @param workBook */ public BaseExcelParserImpl(HSSFWorkbook workBook, String sheetName, int headerRow) { this.workBook = workBook; setHeaderRow(headerRow); setSheetName(sheetName); buildColumnMap(); reset(); } public HSSFWorkbook getWorkBook() { return workBook; } /** * Loads the document and moves to the first row. */ public void load() { loadExcelDocument(); reset(); } /** * Returns the index of the row having the specified value in the indexed column. * * Ensure that the method updateValueToRowMap is called with the correct column index * if the value does not exits in the first column. * * @param value * @return Short value holding the index. */ public Short getRowIdxOfValue(String value) { if (valueToRowMap == null) { /* Be nice, assume the index is on first column */ updateValueToRowMap(new Short((short) 0)); } return (Short) valueToRowMap.get(value); } /** * Returns the value of the specified column from the row having the * specified indexed value. * * @param value * @param columnName * @return */ public String getStringByValueAndColumnName(String value, String columnName) { Short rowIndex; Short columnIndex; HSSFRow row = null; rowIndex = getRowIdxOfValue(value); if (rowIndex == null) { return null; } row = sheet.getRow(rowIndex.intValue()); if (row == null) { return null; } columnIndex = (Short) columnMap.get(columnName); if (columnIndex == null) { throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'"); } return getStringValue(row, columnIndex); } /** * This method updates the valueToRowMap based on the specified column. * Note that the method will throw an exception if two identical values are found. * * @param columnIdx the zero based index of the column to build the map from. */ public void updateValueToRowMap(Short columnIdx) { HSSFRow row = null; String value; for (int rowIdx = 0; rowIdx <= sheet.getLastRowNum(); rowIdx++) { row = sheet.getRow(rowIdx); if (row == null) { continue; } value = getStringValue(row, columnIdx); if (value != null && value.trim().length() > 0) { if (valueToRowMap == null) { valueToRowMap = new HashMap<String, Short>(); } valueToRowMap.put(value, new Short((short) rowIdx)); } } } /** * Returns the 1-based row number, ie: the first row has row number 1, * not 0 as the index. * * Use this when providing feedback to users. * * @return */ public int getCurrentRowNumber() { return currentRowIdx + 1; } public int getCurrentRowIdx() { return currentRowIdx; } /** * Sets the row index of the header row. * @param headerRow */ public void setHeaderRow(int headerRow) { this.headerRow = headerRow; } /** * Moves the row pointer to the first data row. */ public void reset() { if (dataStartRowIdx > 0) { currentRowIdx = dataStartRowIdx; } else if (secondaryHeaderRow >= 0) { currentRowIdx = secondaryHeaderRow + 1; } else { currentRowIdx = headerRow + 1; } } /** * Moves the the next row. */ public void next() { if (hasNext()) { currentRowIdx++; } else { throw new IllegalStateException("Can't move to next row, no more rows."); } } /** * Returns true if there are more rows in the sheet. * * @return */ public boolean hasNext() { return (currentRowIdx <= sheet.getLastRowNum()); } /** * Returns the number of rows with data, below the header. */ public int countNumberOfRowsWithData() { int rowCount = 0; String value; Short zeroIdx = new Short((short) 0); reset(); for (; hasNext(); next()) { value = getString(zeroIdx); if (value != null && value.trim().length() > 0) { rowCount++; } } reset(); return rowCount; } /** * Returns a hashmap of all values having column headers matching the specified value. * The column header will be the key. * * Example : * * Felt 1 Felt 2 * 1 2 * * Will give the hashmap: * "Felt 1" = "1" * "Felt 2" = "2" * * @param columnHeaderPrefix * @return */ public HashMap<String, String> getFieldsMatchingHeaderPrefix(String columnHeaderPrefix) { String key; String value; HashMap<String, String> ret = new HashMap<String, String>(); for (Iterator<String> iter = columnMap.keySet().iterator(); iter.hasNext();) { key = iter.next(); if (!key.startsWith(columnHeaderPrefix)) { continue; } value = getString(key); if (value != null) { ret.put(key, value); } } return ret; } /** * Returns true if the sheet has a column with the specified name. * * @param columnName * @return true if the column exits, otherwise false. */ public boolean hasColumn(String columnName) { Short columnIdx; columnIdx = (Short) columnMap.get(columnName); return (columnIdx == null ? false : true); } /** * Returns the value of the specified columns in specified order. * If neither of the columns exists, null is returned. * * @param columnName1 * @param columnName2 * @return */ public String getStringOrNull(String columnName1, String columnName2) { String value; if (hasColumn(columnName1)) { value = getString(columnName1); if (value != null) { return value; } } if (hasColumn(columnName2)) { value = getString(columnName2); if (value != null) { return value; } } return null; } /** * Returns the value of the specified columns in specified order. * If neither of the columns exists, null is returned. * * @param columnName1 * @return */ public String getStringOrNull(String columnName1) { String value; if (hasColumn(columnName1)) { value = getString(columnName1); if (value != null) { return value; } } return null; } /** * Returns the value of the column having the specified name for the current row. * The value is trimmed. * * @param columnName * @return * @throws IllegalArgumentException is there are no column having the specified columnName */ public String getTrimmedString(String columnName) { String value; value = getString(columnName); if (value != null) { return value.trim(); } return null; } /** * Returns the value of the column having the specified name for the current row. * * @param columnName * @return * @throws IllegalArgumentException is there are no column having the specified columnName */ public String getString(String columnName) { HSSFRow row; Short columnIdx; row = sheet.getRow(currentRowIdx); if (row == null) { return null; } columnIdx = (Short) columnMap.get(columnName); if (columnIdx == null) { throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'"); } return getStringValue(row, columnIdx); } /** * Returns the value of the column having the specified name for the current row. * * @param columnIdx * @return */ public String getString(Short columnIdx) { HSSFRow row; row = sheet.getRow(currentRowIdx); if (row == null) { return null; } return getStringValue(row, columnIdx); } /** * Returns the value of the column having the specified name for the current row. * * @param columnIdx * @return */ public Long getLong(Short columnIdx) { HSSFRow row; row = sheet.getRow(currentRowIdx); if (row == null) { return null; } return getLongValue(row, columnIdx); } /** * Returns the value of the column having the specified name for the current row. * * @param columnName * @return */ public Long getLong(String columnName) { HSSFRow row; Short columnIdx; row = sheet.getRow(currentRowIdx); if (row == null) { return null; } columnIdx = (Short) columnMap.get(columnName); if (columnIdx == null) { throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'"); } return getLongValue(row, columnIdx); } /** * Returns the value of the column having the specified name for the current row. * * @param columnName * @return */ public Double getDouble(String columnName) { HSSFRow row; Short columnIdx; row = sheet.getRow(currentRowIdx); if (row == null) { return null; } columnIdx = (Short) columnMap.get(columnName); if (columnIdx == null) { throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'"); } return getDoubleValue(row, columnIdx); } /** * Returns the value of the column having the specified name for the current row. * * @param columnName * @return */ public Date getDate(String columnName) { HSSFRow row; Short columnIdx; row = sheet.getRow(currentRowIdx); if (row == null) { return null; } columnIdx = (Short) columnMap.get(columnName); if (columnIdx == null) { throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'"); } return getDateValue(row, columnIdx, null); } /** * Returns the value of the column having the specified name for the current row. * * @param columnName * @return */ public Date getDate(String columnName, DateFormat customDateFormat) { HSSFRow row; Short columnIdx; row = sheet.getRow(currentRowIdx); if (row == null) { return null; } columnIdx = (Short) columnMap.get(columnName); if (columnIdx == null) { throw new IllegalArgumentException("The sheet does not have a column with name '" + columnName + "'"); } return getDateValue(row, columnIdx, customDateFormat); } /** * Loads the Excel document. */ protected void loadExcelDocument() { File file = null; POIFSFileSystem poifsFileSystem = null; InputStream inputStream = null; if (excelFileName == null) { logger.error("Can't parse Excel document. No filename specified"); throw new IllegalStateException("Can't parse Excel document. No filename specified"); } logger.debug("Will load Excel document having filename '" + excelFileName + "'"); inputStream = this.getClass().getResourceAsStream(excelFileName); if (inputStream == null) { file = new File(excelFileName); if (!file.exists()) { logger.error("Can't load Excel document having filename '" + excelFileName + "'. The file does not exist"); throw new IllegalArgumentException( "Can't parse Excel document. File " + excelFileName + " does not exist"); } if (!file.canRead()) { logger.error("Can't load Excel document having filename '" + excelFileName + "'. No read access."); throw new IllegalArgumentException("Can't parse Excel document. No access"); } try { inputStream = new FileInputStream(file); } catch (FileNotFoundException e) { logger.error("Can't load Excel document having filename '" + excelFileName + "'. Failed when reading file : '" + e.getMessage() + "'", e); throw new IllegalStateException( "Can't parse Excel document. Failed to read file '" + file + "' " + e); } } try { poifsFileSystem = new POIFSFileSystem(inputStream); } catch (IOException e) { logger.error("Can't load Excel document having filename '" + excelFileName + "'. Failed when reading file : '" + e.getMessage() + "'", e); throw new IllegalStateException("Can't parse Excel document. Failed to read file '" + file + "' " + e); } try { workBook = new HSSFWorkbook(poifsFileSystem); } catch (IOException e) { logger.error("Can't load Excel document having filename '" + excelFileName + "'. Failed when parsing file : '" + e.getMessage() + "'", e); throw new IllegalStateException("Can't parse Excel document. Failed to parse Excel data."); } logger.debug("Successfully loaded and parsed Excel document"); buildColumnMap(); } /** * Builds the map from column name to column index. */ protected void buildColumnMap() { HSSFRow row; Short shortIdx; String columnHeaderName; sheet = workBook.getSheet(getSheetName()); if (sheet == null) { logger.error("Can't extract information. The Excel document does not have a sheet with name '" + getSheetName() + "'"); throw new IllegalArgumentException("Can't extract information, sheet not found"); } row = sheet.getRow(headerRow); if (row == null) { logger.error("Can't extract information. The Excel document is missing first row=[" + headerRow + "] on sheet with name '" + getSheetName() + "'"); throw new IllegalArgumentException("Can't extract information, missing row on sheet"); } columnMap = new HashMap<String, Short>(); for (short colIdx = 0; colIdx < row.getLastCellNum(); colIdx++) { shortIdx = new Short(colIdx); columnHeaderName = getStringValue(row, shortIdx); if (columnHeaderName == null) { continue; } columnHeaderName = columnHeaderName.trim(); columnMap.put(columnHeaderName, shortIdx); } if (secondaryHeaderRow >= 0) { row = sheet.getRow(secondaryHeaderRow); if (row == null) { logger.error("Can't extract information. The Excel document is secondary row " + secondaryHeaderRow + " (index) on sheet with name '" + getSheetName() + "'"); throw new IllegalArgumentException("Can't extract information, missing row on sheet"); } for (short colIdx = 0; colIdx < row.getLastCellNum(); colIdx++) { shortIdx = new Short(colIdx); columnHeaderName = getStringValue(row, shortIdx); if (columnHeaderName == null) { continue; } columnHeaderName = columnHeaderName.trim(); /* Make sure duplicated header names from the secondary header row shadows the indexes from the primary row by just checking of the headername has already been added */ if (!columnMap.containsKey(columnHeaderName)) { columnMap.put(columnHeaderName, shortIdx); } } } } public List<String> getListOfColumnHeaderName() { HSSFRow row; Short shortIdx; String columnHeaderName; sheet = workBook.getSheet(getSheetName()); if (sheet == null) { logger.error("Can't extract information. The Excel document does not have a sheet with name '" + getSheetName() + "'"); throw new IllegalArgumentException("Can't extract information, sheet not found"); } row = sheet.getRow(headerRow); if (row == null) { logger.error("Can't extract information. The Excel document is missing first row on sheet with name '" + getSheetName() + "'"); throw new IllegalArgumentException("Can't extract information, missing row on sheet"); } List<String> listOfColumnHeaderName = new ArrayList<String>(); for (short colIdx = 0; colIdx < row.getLastCellNum(); colIdx++) { shortIdx = new Short(colIdx); columnHeaderName = getStringValue(row, shortIdx); if (columnHeaderName == null) { columnHeaderName = ""; } else { columnHeaderName = columnHeaderName.trim(); } listOfColumnHeaderName.add(columnHeaderName); } return listOfColumnHeaderName; } public String getExcelFileName() { return excelFileName; } /** * Sets the filename of the Excel document. * * @param excelFileName */ public void setExcelFileName(String excelFileName) { this.excelFileName = excelFileName; } /** * Returns the value of the specified column as a String. * * @param row * @param columnIndex * @return */ protected String getStringValue(HSSFRow row, Short columnIndex) { HSSFCell cell; if (columnIndex == null) { return null; } cell = row.getCell(columnIndex.shortValue()); if (cell == null) { return null; } switch (cell.getCellType()) { case HSSFCell.CELL_TYPE_BLANK: return null; case HSSFCell.CELL_TYPE_NUMERIC: // System.out.println("cell:"+cell.getNumericCellValue()+", %1:"+cell.getNumericCellValue() % 1+", "+(cell.getNumericCellValue() % 1 == 0)); if (cell.getNumericCellValue() % 1 == 0) {//is integer /* For now - convert to long */ return Long.toString(new Double(cell.getNumericCellValue()).longValue()); } else {//decimal Locale locale = LocaleTypeNameConst.BOKMAAL; NumberFormat numberFormat = NumberFormat.getInstance(locale); return numberFormat.format(cell.getNumericCellValue()); } case HSSFCell.CELL_TYPE_STRING: return cell.getStringCellValue(); default: return null; } } /** * Return the value of the cell as a long. * * @return */ protected Long getLongValue(HSSFRow row, Short columnIndex) { String value; HSSFCell cell; if (columnIndex == null) { return null; } cell = row.getCell(columnIndex.shortValue()); if (cell == null) { return null; } switch (cell.getCellType()) { case HSSFCell.CELL_TYPE_BLANK: return null; case HSSFCell.CELL_TYPE_NUMERIC: return new Long(new Double(cell.getNumericCellValue()).longValue()); case HSSFCell.CELL_TYPE_STRING: value = cell.getStringCellValue(); if (value == null) { return null; } else { try { return new Long(value); } catch (NumberFormatException nfe) { if (logger.isDebugEnabled()) { logger.debug("NumberFormatException: " + nfe); } } } default: return null; } } /** * Return the value of the cell as a long. * * @return */ protected Date getDateValue(HSSFRow row, Short columnIndex, DateFormat customDateFormat) { Assert.checkRequiredArgument("row", row); Assert.checkRequiredArgument("columnIndex", columnIndex); HSSFCell cell = row.getCell(columnIndex.shortValue()); if (cell == null) { return null; } switch (cell.getCellType()) { case HSSFCell.CELL_TYPE_NUMERIC: if (HSSFDateUtil.isValidExcelDate(cell.getNumericCellValue())) { return cell.getDateCellValue(); } else { return null; } case HSSFCell.CELL_TYPE_STRING: String value = cell.getStringCellValue(); if (StringUtil.isEmpty(value)) { return null; } // ISO data format are default date format DateFormat dateFormat = DateUtil.ISO_DATEFORMAT; if (customDateFormat != null) { dateFormat = customDateFormat; } Date returnValue = null; try { returnValue = DateUtil.parseDateString(dateFormat, value); } catch (IllegalArgumentException e) { if (logger.isDebugEnabled()) { logger.debug("[getDateValue] IllegalArgumentException: " + e); } } return returnValue; default: return null; } } /** * Return the value of the cell as a long. * * @return */ protected Double getDoubleValue(HSSFRow row, Short columnIndex) { Assert.checkRequiredArgument("row", row); Assert.checkRequiredArgument("columnIndex", columnIndex); HSSFCell cell = row.getCell(columnIndex.shortValue()); if (cell == null) { return null; } switch (cell.getCellType()) { case HSSFCell.CELL_TYPE_BLANK: return null; case HSSFCell.CELL_TYPE_NUMERIC: return cell.getNumericCellValue(); case HSSFCell.CELL_TYPE_STRING: String value = cell.getStringCellValue(); if (StringUtil.isEmpty(value)) { return null; } else { Double returnValue = null; try { returnValue = new Double(value); } catch (NumberFormatException nfe) { if (logger.isDebugEnabled()) { logger.debug("NumberFormatException: " + nfe); } } return returnValue; } default: return null; } } public String getSheetName() { return sheetName; } public void setSheetName(String sheetName) { this.sheetName = sheetName; } /** * Returns the index of the first row having data. * @return */ public int getDataStartRowIdx() { return dataStartRowIdx; } /** * Sets the index of the first row having data. * Please be aware that this will reset the counters - ie move the * current row to dataStartRowIdx * * @param dataStartRowIdx the index */ public void setDataStartRowIdx(int dataStartRowIdx) { this.dataStartRowIdx = dataStartRowIdx; reset(); } public void setSecondaryHeaderRow(int secondaryHeaderRow) { this.secondaryHeaderRow = secondaryHeaderRow; } }