ambit.io.IteratingXLSReader.java Source code

Java tutorial

Introduction

Here is the source code for ambit.io.IteratingXLSReader.java

Source

/*
Copyright (C) 2005-2006  
    
Contact: nina@acad.bg
    
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation; either version 2.1
of the License, or (at your option) any later version.
All we ask is that proper credit is given for our work, which includes
- but is not limited to - adding the above copyright notice to the beginning
of your source code files, and to any copyright notice that you may distribute
with programs based on this work.
    
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Lesser General Public License for more details.
    
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
*/

package ambit.io;

import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.TreeMap;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.openscience.cdk.Molecule;
import org.openscience.cdk.exception.InvalidSmilesException;
import org.openscience.cdk.io.formats.IResourceFormat;
import org.openscience.cdk.io.setting.IOSetting;

import ambit.exceptions.AmbitIOException;
import ambit.misc.AmbitCONSTANTS;

/**
 * Reads XLS files. This implementation loads the workbook in memory which is inefficient for big files.
 * 
 * TODO find how to read it without loading into memory.
 * @author Nina Jeliazkova nina@acad.bg
 * <b>Modified</b> Aug 31, 2006
 */
public class IteratingXLSReader extends IteratingFilesWithHeaderReader {
    protected HSSFWorkbook workbook;
    protected HSSFSheet sheet;
    protected Iterator iterator;
    protected InputStream input;
    protected IOSetting[] headerOptions = null;
    //protected HSSFFormulaEvaluator evaluator;

    public IteratingXLSReader(InputStream input, int sheetIndex) throws AmbitIOException {
        super();
        try {
            this.input = input;
            workbook = new HSSFWorkbook(input);
            sheet = workbook.getSheetAt(sheetIndex);
            iterator = sheet.rowIterator();
            //evaluator = new HSSFFormulaEvaluator(sheet, workbook);

            //process first header line
            processHeader((HSSFRow) iterator.next());
            //skip rest of header lines
            for (int i = 1; i < getNumberOfHeaderLines(); i++)
                processHeader((HSSFRow) iterator.next());
        } catch (Exception x) {
            throw new AmbitIOException(x);
        }
    }

    public void close() throws IOException {
        input.close();
        input = null;
        iterator = null;
        sheet = null;
        workbook = null;

    }

    public boolean hasNext() {
        try {
            if (headerOptions == null) {
                headerOptions = setHeaderOptions(header);
                for (int i = 0; i < headerOptions.length; i++)
                    fireIOSettingQuestion(headerOptions[i]);
            }
            return iterator.hasNext();
        } catch (Exception x) {
            logger.error(x);
            return false;
        }
    }

    public Object next() {
        Molecule mol = null;
        try {
            HSSFRow row = (HSSFRow) iterator.next();
            Iterator cols = row.cellIterator();
            Hashtable properties = new Hashtable();

            while (cols.hasNext()) {
                HSSFCell cell = (HSSFCell) cols.next();

                Object value = cell.toString();
                if (cell.getCellType() == HSSFCell.CELL_TYPE_FORMULA) {
                    /*
                    try {
                    HSSFFormulaEvaluator.CellValue cellValue = evaluator.evaluate(cell);
                    switch (cellValue.getCellType()) {
                    case HSSFCell.CELL_TYPE_BOOLEAN:
                        value = cellValue.getBooleanValue();
                        break;
                    case HSSFCell.CELL_TYPE_NUMERIC:
                        value = cellValue.getNumberValue();
                        break;
                    case HSSFCell.CELL_TYPE_STRING:
                        value = cellValue.toString();
                        break;
                    case HSSFCell.CELL_TYPE_BLANK:
                       value = "";
                        break;
                    case HSSFCell.CELL_TYPE_ERROR:
                       value = "";
                        break;
                        
                    // CELL_TYPE_FORMULA will never happen
                    case HSSFCell.CELL_TYPE_FORMULA: 
                        break;
                    }
                    } catch (Exception x) {
                       x.printStackTrace();
                       value = "";
                    }
                    */
                }

                if (smilesIndex == cell.getCellNum()) {
                    try {
                        mol = sp.parseSmiles(value.toString());
                        properties.put(AmbitCONSTANTS.SMILES, value.toString());
                    } catch (InvalidSmilesException x) {
                        logger.warn("Invalid SMILES!\t" + value);
                        properties.put(AmbitCONSTANTS.SMILES, "Invalid SMILES");
                    }
                } else
                    properties.put(header.get(cell.getCellNum()).toString(), value);

            }
            if (mol == null)
                mol = new Molecule();
            mol.setProperties(properties);
        } catch (Exception x) {
            logger.error(x);
        }
        return mol;

    }

    protected void processHeader(HSSFRow row) {

        Iterator cols = row.cellIterator();
        TreeMap columns = new TreeMap();
        while (cols.hasNext()) {
            HSSFCell cell = (HSSFCell) cols.next();
            String value = cell.getStringCellValue();
            /*
            System.out.print(cell.getCellNum());
            System.out.print("\t");
            System.out.println(value);
            */
            if (value.equals(defaultSMILESHeader))
                smilesIndex = cell.getCellNum();
            columns.put(new Integer(cell.getCellNum()), value);
        }
        Iterator i = columns.keySet().iterator();
        while (i.hasNext()) {
            Integer key = (Integer) i.next();
            header.ensureCapacity(key);
            while (key.intValue() >= header.size())
                header.add("");
            header.set(key, columns.get(key));
        }
    }

    public String toString() {
        return "Reads Microsoft Office Excel file (*.xls) ";
    }

    /* (non-Javadoc)
      * @see org.openscience.cdk.io.IChemObjectIO#getFormat()
      */
    public IResourceFormat getFormat() {
        return new XLSFileFormat();
    }
}