no.trank.openpipe.parse.ms.ExcelParser.java Source code

Java tutorial

Introduction

Here is the source code for no.trank.openpipe.parse.ms.ExcelParser.java

Source

/*
 * Copyright 2007  T-Rank AS
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package no.trank.openpipe.parse.ms;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;

import no.trank.openpipe.parse.api.ParseData;
import no.trank.openpipe.parse.api.Parser;
import no.trank.openpipe.parse.api.ParserException;
import no.trank.openpipe.parse.api.ParserResult;
import no.trank.openpipe.parse.api.ParserResultImpl;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
 * Parses .xls files. Reads numeric, string and boolean type cells only.
 * 
 * @version $Revision$
 */
public class ExcelParser implements Parser {
    @Override
    public ParserResult parse(ParseData data) throws IOException, ParserException {
        POIFSFileSystem fs = new POIFSFileSystem(data.getInputStream());

        final HSSFWorkbook doc = new HSSFWorkbook(fs);
        Map<String, String> properties = POIUtils.getProperties(fs);

        final ParserResultImpl result = new ParserResultImpl();
        result.setText(getText(doc));
        result.setTitle(properties.get("title"));
        if (data.includeProperties()) {
            result.setProperties(properties);
        }

        return result;
    }

    private String getText(final HSSFWorkbook doc) {
        StringBuilder text = new StringBuilder();

        for (int sheetNumber = 0; sheetNumber < doc.getNumberOfSheets(); ++sheetNumber) {
            if (sheetNumber > 0) {
                text.append('\n');
            }

            HSSFSheet sheet = doc.getSheetAt(sheetNumber);

            for (Iterator<?> rowIterator = sheet.rowIterator(); rowIterator.hasNext();) {
                HSSFRow row = (HSSFRow) rowIterator.next();

                boolean firstCell = true;
                for (Iterator<?> cellIterator = row.cellIterator(); cellIterator.hasNext();) {
                    HSSFCell cell = (HSSFCell) cellIterator.next();
                    String cellText = getCellText(cell);
                    if (cellText != null) {
                        if (!firstCell) {
                            text.append(' ');
                        }
                        firstCell = false;

                        text.append(cellText);
                    }
                }
                if (!firstCell) {
                    text.append('\n');
                }
            }
        }

        return text.toString();
    }

    private String getCellText(final HSSFCell cell) {
        String ret = null;

        // skip formula/error cells
        switch (cell.getCellType()) {
        case HSSFCell.CELL_TYPE_NUMERIC:
            ret = cell.getNumericCellValue() + "";
            break;
        case HSSFCell.CELL_TYPE_STRING:
            ret = cell.getRichStringCellValue() + "";
            break;
        case HSSFCell.CELL_TYPE_BOOLEAN:
            ret = cell.getBooleanCellValue() ? "true" : "false";
        }

        if (ret != null) {
            ret = ret.trim();
        }
        return ret != null && ret.length() > 0 ? ret : null;
    }
}