at.tuwien.ifs.somtoolbox.data.ESOMInputData.java Source code

Java tutorial

Introduction

Here is the source code for at.tuwien.ifs.somtoolbox.data.ESOMInputData.java

Source

/*
 * Copyright 2004-2010 Information & Software Engineering Group (188/1)
 *                     Institute of Software Technology and Interactive Systems
 *                     Vienna University of Technology, Austria
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package at.tuwien.ifs.somtoolbox.data;

import java.io.BufferedReader;
import java.util.logging.Logger;

import org.apache.commons.lang.ArrayUtils;

import at.tuwien.ifs.somtoolbox.util.FileUtils;
import at.tuwien.ifs.somtoolbox.util.StringUtils;

/**
 * Reads a input data file in the ESOM format. For details on the file-format specification, see
 * http://databionic-esom.sourceforge.net/user.html#File_formats.
 * 
 * @author Rudolf Mayer
 * @version $Id: ESOMInputData.java 3358 2010-02-11 14:35:07Z mayer $
 */
public class ESOMInputData extends SOMLibSparseInputData {

    public ESOMInputData(String vectorFileName) {
        super(vectorFileName);
    }

    @Override
    protected void readVectorFile(String vectorFileName, boolean sparse) {
        try {
            BufferedReader br = FileUtils.openFile("ESOM input data file", vectorFileName);
            // ignore comment lines
            String line = FileUtils.consumeHeaderComments(br);

            // first line: numVectors
            numVectors = Integer.parseInt(line.trim().substring(1).trim());
            // second line: dimensionality. Also includes the index/label field, thus we store the value -1
            dim = Integer.parseInt(br.readLine().trim().substring(1).trim()) - 1;

            initDataStructures(false);

            // third line - column types
            // TODO: process it
            line = br.readLine();

            // fourth line - component names => construct a template vector
            line = br.readLine();
            String[] componentNames = line.split(StringUtils.REGEX_SPACE_OR_TAB);
            templateVector = new SOMLibTemplateVector(numVectors, (String[]) ArrayUtils.remove(componentNames, 0));

            // all the other lines are data
            int index = 0;
            while ((line = br.readLine()) != null) {
                line = line.trim();
                if (line.length() == 0) {
                    continue;
                }
                String[] lineElements = line.split(StringUtils.REGEX_SPACE_OR_TAB);
                // TODO: add a sanity check for lineElements.length == dim (or dim+1 if we have classes)
                for (int ve = 0; ve < dim; ve++) {
                    setMatrixValue(index, ve, parseDouble(lineElements[ve + 1]));
                }
                addInstance(index, lineElements[0]);
                index++;
            }

        } catch (Exception e) {
            Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(ERROR_MESSAGE_FILE_FORMAT_CORRUPT);
            e.printStackTrace();
            throw new IllegalArgumentException(e.getMessage());
        }
        Logger.getLogger("at.tuwien.ifs.somtoolbox").info("ESOM vector file seems to be correct. Riding on ...");
    }

    public static String getFileNameSuffix() {
        return ".esom";
    }

    public static String getFormatName() {
        return "ESOM";
    }

}