mulan.classifier.neural.NormalizationFilter.java Source code

Java tutorial

Introduction

Here is the source code for mulan.classifier.neural.NormalizationFilter.java

Source

/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Copyright (C) 2009-2010 Aristotle University of Thessaloniki, Thessaloniki, Greece
 */

package mulan.classifier.neural;

import java.io.Serializable;
import java.util.Hashtable;
import java.util.Set;

import mulan.data.MultiLabelInstances;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.experiment.Stats;

/**
 * Performs a normalization of numeric attributes of the data set.
 * It is initialized based on given {@link MultiLabelInstances} data set and then can 
 * be used to normalize {@link Instance} instances which conform to the format of the 
 * data set the {@link NormalizationFilter} was initialized from. 
 * 
 * @author Jozef Vilcek
 */
public class NormalizationFilter implements Serializable {

    /** Serial UID for serialization */
    private static final long serialVersionUID = -2575012048861337275L;
    private final double maxValue;
    private final double minValue;
    private Hashtable<Integer, double[]> attStats;

    /**
     * Creates a new instance of {@link NormalizationFilter} class for given data set.
     *
     * @param mlData the {@link MultiLabelInstances} data set from which normalizer
     * should be initialized.
     * @param performNormalization indicates whether normalization of instances contained
     * in the data set used for initialization should be performed
     * @param minValue the minimum value of the normalization range for numerical attributes
     * @param maxValue the maximum value of the normalization range for numerical attributes
     */
    public NormalizationFilter(MultiLabelInstances mlData, boolean performNormalization, double minValue,
            double maxValue) {
        if (mlData == null) {
            throw new IllegalArgumentException("Parameter 'mlData' is null.");
        }
        if (maxValue <= minValue) {
            throw new IllegalArgumentException(String.format(
                    "Parameters 'minValue=%f' and 'maxValue=%f' does not define valid range.", minValue, maxValue));
        }

        this.minValue = minValue;
        this.maxValue = maxValue;
        this.attStats = new Hashtable<Integer, double[]>();

        Initialize(mlData);

        if (performNormalization) {
            Instances instances = mlData.getDataSet();
            int numInstance = instances.numInstances();
            for (int i = 0; i < numInstance; i++) {
                normalize(instances.instance(i));
            }
        }
    }

    /**
     * Creates a new instance of {@link NormalizationFilter} class for given data set.
     * The normalizer will be initialized to perform normalization to the default
     * range <-1,1>.
     *
     * @param mlData the {@link MultiLabelInstances} data set from which normalizer
     * should be initialized.
     * @param performNormalization indicates whether normalization of instances contained
     * in the data set used for initialization should be performed
     */
    public NormalizationFilter(MultiLabelInstances mlData, boolean performNormalization) {
        this(mlData, performNormalization, -1, 1);
    }

    /**
     * Performs a normalization of numerical attributes on given instance.
     * The instance must conform to format of instances data the {@link NormalizationFilter}
     * was initialized with.
     * @param instance the instance to be normalized
     */
    public void normalize(Instance instance) {
        Set<Integer> normScope = attStats.keySet();
        for (Integer attIndex : normScope) {
            double[] stats = attStats.get(attIndex);
            double attMin = stats[0];
            double attMax = stats[1];
            double value = instance.value(attIndex);
            if (attMin == attMax) {
                instance.setValue(attIndex, minValue);
            } else {
                instance.setValue(attIndex,
                        (((value - stats[0]) / (stats[1] - stats[0])) * (maxValue - minValue)) + minValue);
            }
        }
    }

    private void Initialize(MultiLabelInstances mlData) {
        Instances dataSet = mlData.getDataSet();
        int[] featureIndices = mlData.getFeatureIndices();
        for (int attIndex : featureIndices) {
            Attribute feature = dataSet.attribute(attIndex);
            if (feature.isNumeric()) {
                Stats stats = dataSet.attributeStats(attIndex).numericStats;
                attStats.put(attIndex, new double[] { stats.min, stats.max });
            }
        }
    }
}