no.abmu.abmstatistikk.annualstatistic.util.ExcelWithLibraryInformationAndDataParser.java Source code

Java tutorial

Introduction

Here is the source code for no.abmu.abmstatistikk.annualstatistic.util.ExcelWithLibraryInformationAndDataParser.java

Source

/*$Id: ExcelWithLibraryInformationAndDataParser.java 12167 2008-12-08 10:37:10Z jens $*/
/*
 ****************************************************************************
 *                                                                          *
 *                   (c) Copyright 2004 ABM-utvikling                       *
 *                                                                          *
 * This program is free software; you can redistribute it and/or modify it  *
 * under the terms of the GNU General Public License as published by the    *
 * Free Software Foundation; either version 2 of the License, or (at your   *
 * option) any later version.                                               *
 *                                                                          *
 * This program is distributed in the hope that it will be useful, but      *
 * WITHOUT ANY WARRANTY; without even the implied warranty of               *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General *
 * Public License for more details. http://www.gnu.org/licenses/gpl.html    *
 *                                                                          *
 ****************************************************************************
 */

package no.abmu.abmstatistikk.annualstatistic.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import no.abmu.abmstatistikk.annualstatistic.domain.Answer;
import no.abmu.abmstatistikk.annualstatistic.domain.Report;
import no.abmu.abmstatistikk.annualstatistic.service.AnnualStatisticService;
import no.abmu.organisationregister.domain.OrganisationTypeNameConst;
import no.abmu.organisationregister.domain.OrganisationUnit;
import no.abmu.organisationregister.service.OrganisationUnitService;

import org.apache.log4j.Logger;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
 * This class parses Excel spreadsheets with library information.
 * The main purpose of the class is to populate the organisation register
 * for testing and for production.
 *
 * @author Andreas Dahl andread@linpro.no
 * @author $Author: jens $
 * @version $Rev: 12167 $
 * @date $Date: 2008-12-08 11:37:10 +0100 (Mon, 08 Dec 2008) $
 * @copyright ABM-Utvikling
 */
public class ExcelWithLibraryInformationAndDataParser {
    private final Logger logger = Logger.getLogger(ExcelWithLibraryInformationAndDataParser.class);

    private AnnualStatisticService asService;

    /**
     * Represents the parsed Excel document.
     */
    private HSSFWorkbook workBook;

    /**
     * The name of the filename to load the excel document from.
     */
    private String excelFileName;

    /**
     * The default language code to use.
     */
    private String defaultLanguageCode = "NO";

    /**
     * The name of the sheet holding the data.
     */
    private String sheetName = "Alt";

    /**
     * The index of the column holding the organisationUnitId.
     */
    private Short organisationUnitIdColumnIdx = new Short((short) 0);

    /**
     * The index of the column holding the organisation name.
     */
    private Short organisactionNameColumnIdx = new Short((short) 2);

    /**
     * The index of the column holding the address.
     */
    private Short addressColumnIdx = new Short((short) 3);

    /**
     * The index of the column holding the postal code number.
     */
    private Short postCodeNumberColumnIdx = new Short((short) 4);

    /**
     * The index of the column holding the postal code name.
     */
    private Short postCodeNameColumnIdx = new Short((short) 5);

    /**
     * The index of the column holding the library code.
     */
    private Short libraryCodeColumnIdx = new Short((short) 6);

    /**
     * The code of the language of the data to be parsed, for example nor.
     */
    private String languageCode = "nor";

    /**
     * The name of the language of the data to be parsed, for example Bokml.
     */
    private String languageName = "Bokml";

    /**
     * The code of the country to import data to.
     */
    private String countryCode = "NO";

    /**
     * The name of the country to import data to.
     */
    private String countryName = "Norge";

    /**
     * The collection of libraryInformation extracted.
     */
    private ArrayList libraryInformationList;

    /**
     * The collection of converted organisational units.
     */
    private ArrayList organisationalUnits;

    /**
     * This map should hold the key - value pairs of the organistion id's and names.
     *
     * For example:
     * key value
     * 1   School library
     * 2   University library
     * ...
     */
    private Map organisationTypeNameMap;

    /**
     * The default name to use for an organization when the code is not found in the
     * map organisationTypeNameMap.
     * @see organisationTypeNameMap
     */
    private String defaultOrganisationTypeName = "Unknown";

    /**
     * The "feltnr" => "columnnr" map.
     */
    private Map feltMap = new HashMap();

    private OrganisationUnitService organisationUnitService;

    private String organisationType;

    private String schemaShortName;

    private int fieldLine = 0;

    private int firstDataLine;

    /**
     * No-arg constructor.
     */
    public ExcelWithLibraryInformationAndDataParser() {
    }

    /**
     * Does the actual conversion.
     *
     * 1. Find and load the spreadsheet<br/>
     * 2. Extract information.<br/>
     * 3. Convert into domain objects.<br/>
     */

    protected void loadAndConvert() {

        loadExcelDocument();

        extractLibraryInformation();

        convertToDomainObjects();

    }

    /**
     * Iterates the rows in the Spreadsheet data and builds up the
     * LibraryInformation instances.
     */
    protected void extractLibraryInformation() {
        HSSFSheet sheet;
        HSSFRow row;

        LibraryInformation libraryInformation;
        String organisationUnitId;

        libraryInformationList = new ArrayList();

        sheet = workBook.getSheet(getSheetName());
        if (sheet == null) {
            logger.error("Can't extract information. The Excel document does not have a sheet with name '"
                    + getSheetName() + "'");
            throw new IllegalArgumentException("Can't extract information, sheet not found");
        }
        logger.info("Reading " + sheet.getLastRowNum() + " rows.");

        for (int rowIndex = 0; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
            row = sheet.getRow(rowIndex);
            if (row == null) {
                /* No more rows */
                break;
            }

            libraryInformation = new LibraryInformation();

            organisationUnitId = getStringValue(row, getOrganisationUnitIdColumnIdx());

            logger.debug("No OU on line " + rowIndex);

            if (rowIndex == fieldLine) {
                // Building "coloumn nr" => "Felt nr" hash;
                short max = row.getLastCellNum();
                for (short i = 0; i < max; i++) {
                    HSSFCell x = row.getCell(i);
                    if (x != null) {
                        String cellString = getStringValue(row, new Short(i));
                        if (cellString != null) {
                            Pattern p = Pattern.compile("^(Felt)?\\s*(\\d+)$");
                            Matcher m = p.matcher(cellString);
                            if (m.matches()) {
                                String fieldNumber = m.group(2);
                                if (fieldNumber.length() == 1) {
                                    fieldNumber = "00" + fieldNumber;
                                } else if (fieldNumber.length() == 2) {
                                    fieldNumber = "0" + fieldNumber;
                                }
                                logger.info("Found field " + fieldNumber);
                                feltMap.put(fieldNumber, new Short(i));
                            }
                        }
                    }
                }
                continue;
            }

            if (rowIndex < firstDataLine) {
                continue;
            }

            libraryInformation.setOrganisationUnitId(organisationUnitId);

            /* Scan all "Felt" fields */
            Map resultMap = new HashMap();
            Set keys = feltMap.keySet();
            Iterator it = keys.iterator();
            String key = null;
            while (it.hasNext()) {
                key = (String) (it.next());
                String val = getStringValue(row, (Short) feltMap.get(key));
                resultMap.put(key, val);
            }
            libraryInformation.addResultMap(resultMap);

            logger.debug("Adding resultmap: " + resultMap);
            if (organisationType.equals("Fagbibliotek")) {
                Long orgTypeNumber = getLongValue(row, new Short((short) 11));
                String fagBibType = null;
                if (orgTypeNumber != null) {
                    switch (orgTypeNumber.intValue()) {
                    case 0:
                        fagBibType = OrganisationTypeNameConst.NATIONAL_LIBRARY;
                        break;
                    case 1:
                        fagBibType = OrganisationTypeNameConst.UNIVERITY_OF_OSLO;
                        break;
                    case 2:
                        fagBibType = OrganisationTypeNameConst.UNIVERITY_OF_BERGEN;
                        break;
                    case 3:
                        fagBibType = OrganisationTypeNameConst.UNIVERITY_OF_TRONDHEIM;
                        break;
                    case 4:
                        fagBibType = OrganisationTypeNameConst.UNIVERITY_OF_TROMSOE;
                        break;
                    case 5:
                        fagBibType = OrganisationTypeNameConst.SPECIALISED_UNIVERISTY_LIBRARY;
                        break;
                    case 6:
                        fagBibType = OrganisationTypeNameConst.PUBLIC_COLLEGE_LIBRARY;
                        break;
                    case 7:
                        fagBibType = OrganisationTypeNameConst.PRIVATE_COLLEGE_LIBRARY;
                        break;
                    case 8:
                        fagBibType = OrganisationTypeNameConst.PUBLIC_SPECIAL_LIBRARY;
                        break;
                    case 9:
                        fagBibType = OrganisationTypeNameConst.PRIVATE_SPECIAL_LIBRARY;
                        break;
                    default:
                        fagBibType = OrganisationTypeNameConst.FAG_LIBRARY;
                        break;
                    }
                }

                libraryInformation.organisationType = fagBibType;
            } else if (organisationType.equals("Folkebibliotek")) {
                if (organisationUnitId != null && organisationUnitId.equals("456")) {
                    libraryInformation.organisationType = "Folkebibliotek Svalbard";
                } else {
                    libraryInformation.organisationType = organisationType;
                }
            } else if (organisationType.equals("Grunnskolebibliotek")) {
                if (organisationUnitId != null && organisationUnitId.equals("3580")) {
                    libraryInformation.organisationType = "Grunnskolebibliotek Svalbard";
                } else {
                    libraryInformation.organisationType = organisationType;
                }
            } else {
                libraryInformation.organisationType = organisationType;
            }
            libraryInformationList.add(libraryInformation);
        }
    }

    /**
     * Returns the value of the specified column as a String.
     *
     * @param row
     * @param columnIndex
     * @return
     */
    protected String getStringValue(HSSFRow row, Short columnIndex) {
        HSSFCell cell;
        if (columnIndex == null) {
            return null;
        }

        cell = row.getCell(columnIndex.shortValue());

        if (cell == null) {
            return null;
        }
        switch (cell.getCellType()) {
        case HSSFCell.CELL_TYPE_BLANK:
            return null;
        case HSSFCell.CELL_TYPE_NUMERIC:
            /* For now - convert to long */
            return Long.toString(new Double(cell.getNumericCellValue()).longValue());
        case HSSFCell.CELL_TYPE_STRING:
            return cell.getStringCellValue();
        default:
            return null;
        }
    }

    /**
     * Return the value of the cell as a long.
     *
     * @param cell
     * @return
     */
    protected Long getLongValue(HSSFRow row, Short columnIndex) {
        String value;
        HSSFCell cell;

        if (columnIndex == null) {
            return null;
        }

        cell = row.getCell(columnIndex.shortValue());

        if (cell == null) {
            return null;
        }

        switch (cell.getCellType()) {
        case HSSFCell.CELL_TYPE_BLANK:
            return null;
        case HSSFCell.CELL_TYPE_NUMERIC:
            return new Long(new Double(cell.getNumericCellValue()).longValue());
        case HSSFCell.CELL_TYPE_STRING:
            value = cell.getStringCellValue();
            if (value == null) {
                return null;
            } else {
                try {
                    return new Long(value);
                } catch (NumberFormatException nfe) {
                }
            }
        default:
            return null;
        }
    }

    /**
     * Converts the library information extracted into domain objects.
     */
    protected void convertToDomainObjects() {
        LibraryInformation libraryInformation;
        if (organisationUnitService == null) {
            logger.error("No organisationUnitService in convertToDomainObjects");
            return;
        }

        if (libraryInformationList == null || libraryInformationList.isEmpty()) {
            logger.debug("No LibraryInformation to convert");
            return;
        }

        organisationalUnits = new ArrayList();

        logger.info("Saving data for " + libraryInformationList.size() + " units.");
        for (int i = 0; i < libraryInformationList.size(); i++) {
            libraryInformation = (LibraryInformation) libraryInformationList.get(i);
            if (libraryInformation == null) {
                logger.error("Empty libraryInformation");
                continue;
            }

            Map resultMap = libraryInformation.resultMap;
            if (resultMap.size() > 0) { /* Save last years values */
                if (libraryInformation.getOrganisationUnitId() != null) {
                    logger.debug("Organisation had an ID.");
                    String organisationId = libraryInformation.getOrganisationUnitId();
                    OrganisationUnit orgUnit = null;
                    logger.debug("Finding organisation with ID: " + organisationId + " and type: "
                            + libraryInformation.organisationType);

                    // TODO FIX THIS BEFOR USE
                    /*
                    try {
                    orgUnit 
                        = organisationUnitService
                            .getByOrganisationTypeAndId(
                                    libraryInformation.organisationType, 
                                    organisationId);
                    } catch (ObjectNotFoundException e) {
                    logger.error("Can't find "+organisationType
                                +" organisation with ID "
                                +organisationId);
                    continue;
                    }
                    */

                    if (organisationId == null || orgUnit == null || orgUnit.getId() == null) {
                        logger.error("Skipping organisation with ID 0");
                        continue;
                    }
                    logger.debug("Found organisation with ID: " + orgUnit.getId());

                    Report report = asService.getReport(orgUnit.getId().longValue(), schemaShortName, 2003, true);
                    Iterator resultIterator = resultMap.keySet().iterator();
                    while (resultIterator.hasNext()) {
                        String key = (String) resultIterator.next();
                        String value = (String) resultMap.get(key);
                        Answer answer = asService.getAnswerByFieldName(report, key);
                        if (answer == null) {
                            logger.error(
                                    "Trying to save value for nonexsistant field " + key + " in schema fagbib.");
                        } else {
                            answer.setValue(value);
                        }
                    }
                    asService.saveReport(report);
                    if (i > 0 && i % 10 == 0) {
                        logger.info("Processed " + i + " units.");
                    }
                } else {
                    logger.error("Report has no organisation ID, won't save.");
                    logger.error(" Organisation name: " + libraryInformation.organizationName);
                }
            }
        }

    }

    /**
     * Return the name of the organization having the specified code type.
     * @param code
     * @return
     */
    protected String getOrganisationTypeName(Object code) {
        Object value;
        if (organisationTypeNameMap == null || organisationTypeNameMap.isEmpty()) {
            return getDefaultOrganisationTypeName();
        }

        value = organisationTypeNameMap.get(code.toString());
        if (value != null) {
            return value.toString();
        } else {
            return getDefaultOrganisationTypeName();
        }
    }

    /**
     * Loads the Excel document.
     */
    protected void loadExcelDocument() {
        File file;
        POIFSFileSystem poifsFileSystem = null;

        if (excelFileName == null) {
            logger.error("Can't parse Excel document. No filename specified");
            throw new IllegalStateException("Can't parse Excel document. No filename specified");
        }

        logger.debug("Will load Excel document having filename '" + excelFileName + "'");

        file = new File(excelFileName);
        if (file.exists() == false) {
            logger.error(
                    "Can't load Excel document having filename '" + excelFileName + "'. The file does not exist");
            throw new IllegalArgumentException(
                    "Can't parse Excel document. File " + excelFileName + " does not exist");
        }

        if (file.canRead() == false) {
            logger.error("Can't load Excel document having filename '" + excelFileName + "'. No read access.");
            throw new IllegalArgumentException("Can't parse Excel document. No access");
        }

        try {
            poifsFileSystem = new POIFSFileSystem(new FileInputStream(file));
        } catch (IOException e) {
            logger.error("Can't load Excel document having filename '" + excelFileName
                    + "'. Failed when reading file : '" + e.getMessage() + "'", e);
            throw new IllegalStateException("Can't parse Excel document. Failed to read.");
        }

        try {
            workBook = new HSSFWorkbook(poifsFileSystem);
        } catch (IOException e) {
            logger.error("Can't load Excel document having filename '" + excelFileName
                    + "'. Failed when parsing file : '" + e.getMessage() + "'", e);
            throw new IllegalStateException("Can't parse Excel document. Failed to parse Excel data.");
        }

        logger.debug("Successfully loaded and parsed Excel document");
    }

    public String getExcelFileName() {
        return excelFileName;
    }

    /**
     * Sets the filename of the Excel document.
     * This method will trigger the conversion.
     *
     * @param excelFileName
     */
    public void setExcelFileName(String excelFileName) {
        this.excelFileName = excelFileName;

        loadAndConvert();
    }

    public String getSheetName() {
        return sheetName;
    }

    /**
     * Sets the name of the sheet that holds the data.
     *
     * @param sheetName name of sheet, defaulted to Alt
     * @see sheetName
     */
    public void setSheetName(String sheetName) {
        this.sheetName = sheetName;
    }

    public Short getOrganisationUnitIdColumnIdx() {
        return organisationUnitIdColumnIdx;
    }

    /**
     * Specified the 0-based index of the column holding the organisationUnitId.
     * @param organisationUnitIdColumnIdx
     */
    public void setOrganisationUnitIdColumnIdx(Short organisationUnitIdColumnIdx) {
        this.organisationUnitIdColumnIdx = organisationUnitIdColumnIdx;
    }

    public Short getOrganisactionNameColumnIdx() {
        return organisactionNameColumnIdx;
    }

    /**
     * Specified the 0-based index of the column holding the organisation name.
     *
     * @param organisactionNameColumnIdx
     */
    public void setOrganisactionNameColumnIdx(Short organisactionNameColumnIdx) {
        this.organisactionNameColumnIdx = organisactionNameColumnIdx;
    }

    public Short getPostCodeNumberColumnIdx() {
        return postCodeNumberColumnIdx;
    }

    /**
     * Specified the 0-based index of the column holding the post code number.
     *
     * @param organisactionNameColumnIdx
     */
    public void setPostCodeNumberColumnIdx(Short postCodeNumberColumnIdx) {
        this.postCodeNumberColumnIdx = postCodeNumberColumnIdx;
    }

    public Short getPostCodeNameColumnIdx() {
        return postCodeNameColumnIdx;
    }

    /**
     * Specified the 0-based index of the column holding the post code name.
     *
     * @param organisactionNameColumnIdx
     */

    public void setPostCodeNameColumnIdx(Short postCodeNameColumnIdx) {
        this.postCodeNameColumnIdx = postCodeNameColumnIdx;
    }

    public Short getLibraryCodeColumnIdx() {
        return libraryCodeColumnIdx;
    }

    /**
     * Specified the 0-based index of the column holding the library code.
     *
     * @param organisactionNameColumnIdx
     */
    public void setLibraryCodeColumnIdx(Short libraryCodeColumnIdx) {
        this.libraryCodeColumnIdx = libraryCodeColumnIdx;
    }

    public Short getAddressColumnIdx() {
        return addressColumnIdx;
    }

    public void setAddressColumnIdx(Short addressColumnIdx) {
        this.addressColumnIdx = addressColumnIdx;
    }

    /**
     * Returns the collection of OrganisationUnit instances that has been converted.
     *
     * @return
     */
    public ArrayList getOrganisationalUnits() {
        return organisationalUnits;
    }

    public String getLanguageCode() {
        return languageCode;
    }

    public void setLanguageCode(String languageCode) {
        this.languageCode = languageCode;
    }

    public String getLanguageName() {
        return languageName;
    }

    public void setLanguageName(String languageName) {
        this.languageName = languageName;
    }

    public String getCountryCode() {
        return countryCode;
    }

    public void setCountryCode(String countryCode) {
        this.countryCode = countryCode;
    }

    public String getCountryName() {
        return countryName;
    }

    public void setCountryName(String countryName) {
        this.countryName = countryName;
    }

    public Map getOrganisationTypeNameMap() {
        return organisationTypeNameMap;
    }

    public void setOrganisationTypeNameMap(Map organisationTypeNameMap) {
        this.organisationTypeNameMap = organisationTypeNameMap;
    }

    public String getDefaultOrganisationTypeName() {
        return defaultOrganisationTypeName;
    }

    public void setDefaultOrganisationTypeName(String defaultOrganisationTypeName) {
        this.defaultOrganisationTypeName = defaultOrganisationTypeName;
    }

    /**
     * Represents information extracted from one row.
     * For temporary use before converting into the domain objects.
     */
    class LibraryInformation {
        public String organisationType;
        private String organisationUnitId;
        private String organizationName;
        private String postCodeNumber;
        private String postCodeName;
        private String address;
        private Long libraryCode;
        private Map resultMap;

        public String getOrganisationUnitId() {
            return organisationUnitId;
        }

        public void setOrganisationUnitId(String organisationUnitId) {
            this.organisationUnitId = organisationUnitId;
        }

        public String getOrganizationName() {
            return organizationName;
        }

        public void setOrganizationName(String organizationName) {
            this.organizationName = organizationName;
        }

        public String getPostCodeNumber() {
            return postCodeNumber;
        }

        public void setPostCodeNumber(String postCodeNumber) {
            this.postCodeNumber = postCodeNumber;
        }

        public String getPostCodeName() {
            return postCodeName;
        }

        public void setPostCodeName(String postCodeName) {
            this.postCodeName = postCodeName;
        }

        public String getAddress() {
            return address;
        }

        public void setAddress(String address) {
            this.address = address;
        }

        public Long getLibraryCode() {
            return libraryCode;
        }

        public void setLibraryCode(Long libraryCode) {
            this.libraryCode = libraryCode;
        }

        public void addResultMap(Map resultMap) {
            this.resultMap = resultMap;
        }

        /**
         * Provided just for debugging.
         *
         * @return
         */
        public String toString() {
            return "LibraryInformation{" + "organisationUnitId=" + organisationUnitId + ", organizationName='"
                    + organizationName + "'" + ", postCodeNumber='" + postCodeNumber + "'" + ", postCodeName='"
                    + postCodeName + "'" + ", address='" + address + "'" + ", libraryCode=" + libraryCode
                    + ", ResultMap=" + resultMap + "}";
        }
    }

    public AnnualStatisticService getAnnualStatisticService() {
        return asService;
    }

    public void setAnnualStatisticService(AnnualStatisticService service) {
        asService = service;
    }

    public void setOrganisationUnitService(OrganisationUnitService organisationUnitService) {
        this.organisationUnitService = organisationUnitService;
    }

    /**
     * getSchemaShortName.
     * 
     * @return Returns the organisationShortName.
     */
    public String getSchemaShortName() {
        return schemaShortName;
    }

    /**
     * setSchemaShortName.
     * 
     * @param organisationShortName The organisationShortName to set.
     */
    public void setSchemaShortName(String organisationShortName) {
        this.schemaShortName = organisationShortName;
    }

    public String getOrganisationType() {
        return organisationType;
    }

    public void setOrganisationType(String organisationType) {
        this.organisationType = organisationType;
    }

    public void setFieldLine(int fieldLine) {
        logger.debug("Setting fieldLine to " + fieldLine);
        this.fieldLine = fieldLine;
    }

    public void setFirstDataLine(int firstDataLine) {
        this.firstDataLine = firstDataLine;
    }
}