Java tutorial
/*$Id: ExcelWithLibraryInformationAndDataParser.java 12167 2008-12-08 10:37:10Z jens $*/ /* **************************************************************************** * * * (c) Copyright 2004 ABM-utvikling * * * * This program is free software; you can redistribute it and/or modify it * * under the terms of the GNU General Public License as published by the * * Free Software Foundation; either version 2 of the License, or (at your * * option) any later version. * * * * This program is distributed in the hope that it will be useful, but * * WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General * * Public License for more details. http://www.gnu.org/licenses/gpl.html * * * **************************************************************************** */ package no.abmu.abmstatistikk.annualstatistic.util; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import no.abmu.abmstatistikk.annualstatistic.domain.Answer; import no.abmu.abmstatistikk.annualstatistic.domain.Report; import no.abmu.abmstatistikk.annualstatistic.service.AnnualStatisticService; import no.abmu.organisationregister.domain.OrganisationTypeNameConst; import no.abmu.organisationregister.domain.OrganisationUnit; import no.abmu.organisationregister.service.OrganisationUnitService; import org.apache.log4j.Logger; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** * This class parses Excel spreadsheets with library information. * The main purpose of the class is to populate the organisation register * for testing and for production. * * @author Andreas Dahl andread@linpro.no * @author $Author: jens $ * @version $Rev: 12167 $ * @date $Date: 2008-12-08 11:37:10 +0100 (Mon, 08 Dec 2008) $ * @copyright ABM-Utvikling */ public class ExcelWithLibraryInformationAndDataParser { private final Logger logger = Logger.getLogger(ExcelWithLibraryInformationAndDataParser.class); private AnnualStatisticService asService; /** * Represents the parsed Excel document. */ private HSSFWorkbook workBook; /** * The name of the filename to load the excel document from. */ private String excelFileName; /** * The default language code to use. */ private String defaultLanguageCode = "NO"; /** * The name of the sheet holding the data. */ private String sheetName = "Alt"; /** * The index of the column holding the organisationUnitId. */ private Short organisationUnitIdColumnIdx = new Short((short) 0); /** * The index of the column holding the organisation name. */ private Short organisactionNameColumnIdx = new Short((short) 2); /** * The index of the column holding the address. */ private Short addressColumnIdx = new Short((short) 3); /** * The index of the column holding the postal code number. */ private Short postCodeNumberColumnIdx = new Short((short) 4); /** * The index of the column holding the postal code name. */ private Short postCodeNameColumnIdx = new Short((short) 5); /** * The index of the column holding the library code. */ private Short libraryCodeColumnIdx = new Short((short) 6); /** * The code of the language of the data to be parsed, for example nor. */ private String languageCode = "nor"; /** * The name of the language of the data to be parsed, for example Bokml. */ private String languageName = "Bokml"; /** * The code of the country to import data to. */ private String countryCode = "NO"; /** * The name of the country to import data to. */ private String countryName = "Norge"; /** * The collection of libraryInformation extracted. */ private ArrayList libraryInformationList; /** * The collection of converted organisational units. */ private ArrayList organisationalUnits; /** * This map should hold the key - value pairs of the organistion id's and names. * * For example: * key value * 1 School library * 2 University library * ... */ private Map organisationTypeNameMap; /** * The default name to use for an organization when the code is not found in the * map organisationTypeNameMap. * @see organisationTypeNameMap */ private String defaultOrganisationTypeName = "Unknown"; /** * The "feltnr" => "columnnr" map. */ private Map feltMap = new HashMap(); private OrganisationUnitService organisationUnitService; private String organisationType; private String schemaShortName; private int fieldLine = 0; private int firstDataLine; /** * No-arg constructor. */ public ExcelWithLibraryInformationAndDataParser() { } /** * Does the actual conversion. * * 1. Find and load the spreadsheet<br/> * 2. Extract information.<br/> * 3. Convert into domain objects.<br/> */ protected void loadAndConvert() { loadExcelDocument(); extractLibraryInformation(); convertToDomainObjects(); } /** * Iterates the rows in the Spreadsheet data and builds up the * LibraryInformation instances. */ protected void extractLibraryInformation() { HSSFSheet sheet; HSSFRow row; LibraryInformation libraryInformation; String organisationUnitId; libraryInformationList = new ArrayList(); sheet = workBook.getSheet(getSheetName()); if (sheet == null) { logger.error("Can't extract information. The Excel document does not have a sheet with name '" + getSheetName() + "'"); throw new IllegalArgumentException("Can't extract information, sheet not found"); } logger.info("Reading " + sheet.getLastRowNum() + " rows."); for (int rowIndex = 0; rowIndex <= sheet.getLastRowNum(); rowIndex++) { row = sheet.getRow(rowIndex); if (row == null) { /* No more rows */ break; } libraryInformation = new LibraryInformation(); organisationUnitId = getStringValue(row, getOrganisationUnitIdColumnIdx()); logger.debug("No OU on line " + rowIndex); if (rowIndex == fieldLine) { // Building "coloumn nr" => "Felt nr" hash; short max = row.getLastCellNum(); for (short i = 0; i < max; i++) { HSSFCell x = row.getCell(i); if (x != null) { String cellString = getStringValue(row, new Short(i)); if (cellString != null) { Pattern p = Pattern.compile("^(Felt)?\\s*(\\d+)$"); Matcher m = p.matcher(cellString); if (m.matches()) { String fieldNumber = m.group(2); if (fieldNumber.length() == 1) { fieldNumber = "00" + fieldNumber; } else if (fieldNumber.length() == 2) { fieldNumber = "0" + fieldNumber; } logger.info("Found field " + fieldNumber); feltMap.put(fieldNumber, new Short(i)); } } } } continue; } if (rowIndex < firstDataLine) { continue; } libraryInformation.setOrganisationUnitId(organisationUnitId); /* Scan all "Felt" fields */ Map resultMap = new HashMap(); Set keys = feltMap.keySet(); Iterator it = keys.iterator(); String key = null; while (it.hasNext()) { key = (String) (it.next()); String val = getStringValue(row, (Short) feltMap.get(key)); resultMap.put(key, val); } libraryInformation.addResultMap(resultMap); logger.debug("Adding resultmap: " + resultMap); if (organisationType.equals("Fagbibliotek")) { Long orgTypeNumber = getLongValue(row, new Short((short) 11)); String fagBibType = null; if (orgTypeNumber != null) { switch (orgTypeNumber.intValue()) { case 0: fagBibType = OrganisationTypeNameConst.NATIONAL_LIBRARY; break; case 1: fagBibType = OrganisationTypeNameConst.UNIVERITY_OF_OSLO; break; case 2: fagBibType = OrganisationTypeNameConst.UNIVERITY_OF_BERGEN; break; case 3: fagBibType = OrganisationTypeNameConst.UNIVERITY_OF_TRONDHEIM; break; case 4: fagBibType = OrganisationTypeNameConst.UNIVERITY_OF_TROMSOE; break; case 5: fagBibType = OrganisationTypeNameConst.SPECIALISED_UNIVERISTY_LIBRARY; break; case 6: fagBibType = OrganisationTypeNameConst.PUBLIC_COLLEGE_LIBRARY; break; case 7: fagBibType = OrganisationTypeNameConst.PRIVATE_COLLEGE_LIBRARY; break; case 8: fagBibType = OrganisationTypeNameConst.PUBLIC_SPECIAL_LIBRARY; break; case 9: fagBibType = OrganisationTypeNameConst.PRIVATE_SPECIAL_LIBRARY; break; default: fagBibType = OrganisationTypeNameConst.FAG_LIBRARY; break; } } libraryInformation.organisationType = fagBibType; } else if (organisationType.equals("Folkebibliotek")) { if (organisationUnitId != null && organisationUnitId.equals("456")) { libraryInformation.organisationType = "Folkebibliotek Svalbard"; } else { libraryInformation.organisationType = organisationType; } } else if (organisationType.equals("Grunnskolebibliotek")) { if (organisationUnitId != null && organisationUnitId.equals("3580")) { libraryInformation.organisationType = "Grunnskolebibliotek Svalbard"; } else { libraryInformation.organisationType = organisationType; } } else { libraryInformation.organisationType = organisationType; } libraryInformationList.add(libraryInformation); } } /** * Returns the value of the specified column as a String. * * @param row * @param columnIndex * @return */ protected String getStringValue(HSSFRow row, Short columnIndex) { HSSFCell cell; if (columnIndex == null) { return null; } cell = row.getCell(columnIndex.shortValue()); if (cell == null) { return null; } switch (cell.getCellType()) { case HSSFCell.CELL_TYPE_BLANK: return null; case HSSFCell.CELL_TYPE_NUMERIC: /* For now - convert to long */ return Long.toString(new Double(cell.getNumericCellValue()).longValue()); case HSSFCell.CELL_TYPE_STRING: return cell.getStringCellValue(); default: return null; } } /** * Return the value of the cell as a long. * * @param cell * @return */ protected Long getLongValue(HSSFRow row, Short columnIndex) { String value; HSSFCell cell; if (columnIndex == null) { return null; } cell = row.getCell(columnIndex.shortValue()); if (cell == null) { return null; } switch (cell.getCellType()) { case HSSFCell.CELL_TYPE_BLANK: return null; case HSSFCell.CELL_TYPE_NUMERIC: return new Long(new Double(cell.getNumericCellValue()).longValue()); case HSSFCell.CELL_TYPE_STRING: value = cell.getStringCellValue(); if (value == null) { return null; } else { try { return new Long(value); } catch (NumberFormatException nfe) { } } default: return null; } } /** * Converts the library information extracted into domain objects. */ protected void convertToDomainObjects() { LibraryInformation libraryInformation; if (organisationUnitService == null) { logger.error("No organisationUnitService in convertToDomainObjects"); return; } if (libraryInformationList == null || libraryInformationList.isEmpty()) { logger.debug("No LibraryInformation to convert"); return; } organisationalUnits = new ArrayList(); logger.info("Saving data for " + libraryInformationList.size() + " units."); for (int i = 0; i < libraryInformationList.size(); i++) { libraryInformation = (LibraryInformation) libraryInformationList.get(i); if (libraryInformation == null) { logger.error("Empty libraryInformation"); continue; } Map resultMap = libraryInformation.resultMap; if (resultMap.size() > 0) { /* Save last years values */ if (libraryInformation.getOrganisationUnitId() != null) { logger.debug("Organisation had an ID."); String organisationId = libraryInformation.getOrganisationUnitId(); OrganisationUnit orgUnit = null; logger.debug("Finding organisation with ID: " + organisationId + " and type: " + libraryInformation.organisationType); // TODO FIX THIS BEFOR USE /* try { orgUnit = organisationUnitService .getByOrganisationTypeAndId( libraryInformation.organisationType, organisationId); } catch (ObjectNotFoundException e) { logger.error("Can't find "+organisationType +" organisation with ID " +organisationId); continue; } */ if (organisationId == null || orgUnit == null || orgUnit.getId() == null) { logger.error("Skipping organisation with ID 0"); continue; } logger.debug("Found organisation with ID: " + orgUnit.getId()); Report report = asService.getReport(orgUnit.getId().longValue(), schemaShortName, 2003, true); Iterator resultIterator = resultMap.keySet().iterator(); while (resultIterator.hasNext()) { String key = (String) resultIterator.next(); String value = (String) resultMap.get(key); Answer answer = asService.getAnswerByFieldName(report, key); if (answer == null) { logger.error( "Trying to save value for nonexsistant field " + key + " in schema fagbib."); } else { answer.setValue(value); } } asService.saveReport(report); if (i > 0 && i % 10 == 0) { logger.info("Processed " + i + " units."); } } else { logger.error("Report has no organisation ID, won't save."); logger.error(" Organisation name: " + libraryInformation.organizationName); } } } } /** * Return the name of the organization having the specified code type. * @param code * @return */ protected String getOrganisationTypeName(Object code) { Object value; if (organisationTypeNameMap == null || organisationTypeNameMap.isEmpty()) { return getDefaultOrganisationTypeName(); } value = organisationTypeNameMap.get(code.toString()); if (value != null) { return value.toString(); } else { return getDefaultOrganisationTypeName(); } } /** * Loads the Excel document. */ protected void loadExcelDocument() { File file; POIFSFileSystem poifsFileSystem = null; if (excelFileName == null) { logger.error("Can't parse Excel document. No filename specified"); throw new IllegalStateException("Can't parse Excel document. No filename specified"); } logger.debug("Will load Excel document having filename '" + excelFileName + "'"); file = new File(excelFileName); if (file.exists() == false) { logger.error( "Can't load Excel document having filename '" + excelFileName + "'. The file does not exist"); throw new IllegalArgumentException( "Can't parse Excel document. File " + excelFileName + " does not exist"); } if (file.canRead() == false) { logger.error("Can't load Excel document having filename '" + excelFileName + "'. No read access."); throw new IllegalArgumentException("Can't parse Excel document. No access"); } try { poifsFileSystem = new POIFSFileSystem(new FileInputStream(file)); } catch (IOException e) { logger.error("Can't load Excel document having filename '" + excelFileName + "'. Failed when reading file : '" + e.getMessage() + "'", e); throw new IllegalStateException("Can't parse Excel document. Failed to read."); } try { workBook = new HSSFWorkbook(poifsFileSystem); } catch (IOException e) { logger.error("Can't load Excel document having filename '" + excelFileName + "'. Failed when parsing file : '" + e.getMessage() + "'", e); throw new IllegalStateException("Can't parse Excel document. Failed to parse Excel data."); } logger.debug("Successfully loaded and parsed Excel document"); } public String getExcelFileName() { return excelFileName; } /** * Sets the filename of the Excel document. * This method will trigger the conversion. * * @param excelFileName */ public void setExcelFileName(String excelFileName) { this.excelFileName = excelFileName; loadAndConvert(); } public String getSheetName() { return sheetName; } /** * Sets the name of the sheet that holds the data. * * @param sheetName name of sheet, defaulted to Alt * @see sheetName */ public void setSheetName(String sheetName) { this.sheetName = sheetName; } public Short getOrganisationUnitIdColumnIdx() { return organisationUnitIdColumnIdx; } /** * Specified the 0-based index of the column holding the organisationUnitId. * @param organisationUnitIdColumnIdx */ public void setOrganisationUnitIdColumnIdx(Short organisationUnitIdColumnIdx) { this.organisationUnitIdColumnIdx = organisationUnitIdColumnIdx; } public Short getOrganisactionNameColumnIdx() { return organisactionNameColumnIdx; } /** * Specified the 0-based index of the column holding the organisation name. * * @param organisactionNameColumnIdx */ public void setOrganisactionNameColumnIdx(Short organisactionNameColumnIdx) { this.organisactionNameColumnIdx = organisactionNameColumnIdx; } public Short getPostCodeNumberColumnIdx() { return postCodeNumberColumnIdx; } /** * Specified the 0-based index of the column holding the post code number. * * @param organisactionNameColumnIdx */ public void setPostCodeNumberColumnIdx(Short postCodeNumberColumnIdx) { this.postCodeNumberColumnIdx = postCodeNumberColumnIdx; } public Short getPostCodeNameColumnIdx() { return postCodeNameColumnIdx; } /** * Specified the 0-based index of the column holding the post code name. * * @param organisactionNameColumnIdx */ public void setPostCodeNameColumnIdx(Short postCodeNameColumnIdx) { this.postCodeNameColumnIdx = postCodeNameColumnIdx; } public Short getLibraryCodeColumnIdx() { return libraryCodeColumnIdx; } /** * Specified the 0-based index of the column holding the library code. * * @param organisactionNameColumnIdx */ public void setLibraryCodeColumnIdx(Short libraryCodeColumnIdx) { this.libraryCodeColumnIdx = libraryCodeColumnIdx; } public Short getAddressColumnIdx() { return addressColumnIdx; } public void setAddressColumnIdx(Short addressColumnIdx) { this.addressColumnIdx = addressColumnIdx; } /** * Returns the collection of OrganisationUnit instances that has been converted. * * @return */ public ArrayList getOrganisationalUnits() { return organisationalUnits; } public String getLanguageCode() { return languageCode; } public void setLanguageCode(String languageCode) { this.languageCode = languageCode; } public String getLanguageName() { return languageName; } public void setLanguageName(String languageName) { this.languageName = languageName; } public String getCountryCode() { return countryCode; } public void setCountryCode(String countryCode) { this.countryCode = countryCode; } public String getCountryName() { return countryName; } public void setCountryName(String countryName) { this.countryName = countryName; } public Map getOrganisationTypeNameMap() { return organisationTypeNameMap; } public void setOrganisationTypeNameMap(Map organisationTypeNameMap) { this.organisationTypeNameMap = organisationTypeNameMap; } public String getDefaultOrganisationTypeName() { return defaultOrganisationTypeName; } public void setDefaultOrganisationTypeName(String defaultOrganisationTypeName) { this.defaultOrganisationTypeName = defaultOrganisationTypeName; } /** * Represents information extracted from one row. * For temporary use before converting into the domain objects. */ class LibraryInformation { public String organisationType; private String organisationUnitId; private String organizationName; private String postCodeNumber; private String postCodeName; private String address; private Long libraryCode; private Map resultMap; public String getOrganisationUnitId() { return organisationUnitId; } public void setOrganisationUnitId(String organisationUnitId) { this.organisationUnitId = organisationUnitId; } public String getOrganizationName() { return organizationName; } public void setOrganizationName(String organizationName) { this.organizationName = organizationName; } public String getPostCodeNumber() { return postCodeNumber; } public void setPostCodeNumber(String postCodeNumber) { this.postCodeNumber = postCodeNumber; } public String getPostCodeName() { return postCodeName; } public void setPostCodeName(String postCodeName) { this.postCodeName = postCodeName; } public String getAddress() { return address; } public void setAddress(String address) { this.address = address; } public Long getLibraryCode() { return libraryCode; } public void setLibraryCode(Long libraryCode) { this.libraryCode = libraryCode; } public void addResultMap(Map resultMap) { this.resultMap = resultMap; } /** * Provided just for debugging. * * @return */ public String toString() { return "LibraryInformation{" + "organisationUnitId=" + organisationUnitId + ", organizationName='" + organizationName + "'" + ", postCodeNumber='" + postCodeNumber + "'" + ", postCodeName='" + postCodeName + "'" + ", address='" + address + "'" + ", libraryCode=" + libraryCode + ", ResultMap=" + resultMap + "}"; } } public AnnualStatisticService getAnnualStatisticService() { return asService; } public void setAnnualStatisticService(AnnualStatisticService service) { asService = service; } public void setOrganisationUnitService(OrganisationUnitService organisationUnitService) { this.organisationUnitService = organisationUnitService; } /** * getSchemaShortName. * * @return Returns the organisationShortName. */ public String getSchemaShortName() { return schemaShortName; } /** * setSchemaShortName. * * @param organisationShortName The organisationShortName to set. */ public void setSchemaShortName(String organisationShortName) { this.schemaShortName = organisationShortName; } public String getOrganisationType() { return organisationType; } public void setOrganisationType(String organisationType) { this.organisationType = organisationType; } public void setFieldLine(int fieldLine) { logger.debug("Setting fieldLine to " + fieldLine); this.fieldLine = fieldLine; } public void setFirstDataLine(int firstDataLine) { this.firstDataLine = firstDataLine; } }