org.opentox.qsar.processors.trainers.regression.WekaRegressor.java Source code

Java tutorial

Introduction

Here is the source code for org.opentox.qsar.processors.trainers.regression.WekaRegressor.java

Source

/*
 *
 * YAQP - Yet Another QSAR Project:
 * Machine Learning algorithms designed for the prediction of toxicological
 * features of chemical compounds become available on the Web. Yaqp is developed
 * under OpenTox (http://opentox.org) which is an FP7-funded EU research project.
 * This project was developed at the Automatic Control Lab in the Chemical Engineering
 * School of the National Technical University of Athens. Please read README for more
 * information.
 *
 * Copyright (C) 2009-2010 Pantelis Sopasakis & Charalampos Chomenides
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Contact:
 * Pantelis Sopasakis
 * chvng@mail.ntua.gr
 * Address: Iroon Politechniou St. 9, Zografou, Athens Greece
 * tel. +30 210 7723236
 */

package org.opentox.qsar.processors.trainers.regression;

import java.util.Map;
import org.opentox.core.exceptions.Cause;
import org.opentox.ontology.components.QSARModel;
import org.opentox.ontology.util.AlgorithmParameter;
import org.opentox.qsar.exceptions.QSARException;
import org.opentox.qsar.processors.filters.AttributeCleanup;
import org.opentox.qsar.processors.filters.AttributeCleanup.ATTRIBUTE_TYPE;
import org.opentox.qsar.processors.filters.SimpleMVHFilter;
import org.opentox.qsar.processors.trainers.AbstractTrainer;
import org.opentox.qsar.processors.trainers.WekaTrainer;
import org.opentox.www.rest.components.YaqpForm;
import weka.core.Attribute;
import weka.core.Instances;

/**
 *
 * @author Pantelis Sopasakis
 * @author Charalampos Chomenides
 */
public abstract class WekaRegressor extends WekaTrainer {

    public WekaRegressor(final YaqpForm form) throws QSARException {
        super(form);
    }

    public WekaRegressor(final Map<String, AlgorithmParameter> parameters) throws QSARException {
        super(parameters);
    }

    public WekaRegressor() {
        super();
    }

    @Override
    public Instances preprocessData(Instances data) throws QSARException {
        /* Check if data == null*/
        if (data == null)
            throw new NullPointerException("Trainers do not accept null datasets.");

        /* The incoming dataset always has the first attribute set to
        'compound_uri' which is of type "String". This is removed at the
        begining of the training procedure */
        AttributeCleanup filter = new AttributeCleanup(ATTRIBUTE_TYPE.string);
        // NOTE: Removal of string attributes should be always performed prior to any kind of training!
        data = filter.filter(data);
        SimpleMVHFilter fil = new SimpleMVHFilter();
        data = fil.filter(data);

        /*
         * Do some checks for the prediction feature...
         */
        // CHECK IF THE PREDICTION FEATURE EXISTS
        // IF IT DOESN'T PROVIDE A LIST OF SOME NUMERIC FEATURES IN THE DATASET
        Attribute classAttribute = data.attribute(predictionFeature);
        if (classAttribute == null) {
            String message = "The prediction feature you provided is is not included in the  dataset :{"
                    + predictionFeature + "}. " + attributeHint(data);

            throw new QSARException(Cause.XQReg202, message);
        }

        // CHECK IF THE PREDICTION FEATURE IS NUMERIC:
        // IF IT DOESN'T PROVIDE A LIST OF SOME NUMERIC FEATURES IN THE DATASET
        if (classAttribute.type() != Attribute.NUMERIC) {
            String message = "The prediction feature you provided is not numeric : " + "{" + predictionFeature
                    + "}. " + attributeHint(data);
            throw new QSARException(Cause.XQReg203, message);
        }

        data.setClass(data.attribute(predictionFeature.toString()));

        return data;

    }

    private String attributeHint(Instances data) {
        final String NEWLINE = "\n";
        String hint = "Available features :" + NEWLINE;
        final int MAX_ROWS = 5;
        int jndex = 0;
        Attribute temp;
        for (int i = 0; i < data.numAttributes() && jndex < MAX_ROWS; i++) {
            temp = data.attribute(i);
            if (temp.type() == Attribute.NUMERIC) {
                hint += temp.name() + NEWLINE;
                jndex++;
            }
        }
        return hint;
    }

}