milk.core.Exemplar.java Source code

Java tutorial

Introduction

Here is the source code for milk.core.Exemplar.java

Source

/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Exemplar.java
 *    Copyright (C) 2002 University of Waikato
 * 
 */

package milk.core;

import java.io.*;
import java.util.*;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.*;

/**
 * Class for handling an ordered set of weighted exemplars. <p>
 *
 * This class is not intended for reading directly from the Reader.
 * Instead, the data should be pre-processed and then are tried to
 * form an exemplar.
 * 
 * The following members are still useful from the Instances class
 * in the context of exemplar by using getInstances() 
 *
 * <br> attribute(int) 
 * <br> attribute(String) 
 * <br> attributeStats(int) 
 * <br> attributeToDoubleArray(int) 
 * <br> checkForStringAttributes() 
 * <br> checkInstance(Instance) 
 * <br> classAttribute() 
 * <br> classIndex() 
 * <br> compactify() 
 * <br> delete() 
 * <br> delete(int) 
 * <br> deleteWithMissing(Attribute) 
 * <br> deleteWithMissing(int)  
 * <br> enumerateInstances() 
 * <br> firstInstance() 
 * <br> instance(int) 
 * <br> lastInstance() 
 * <br> meanOrMode(Attribute) 
 * <br> meanOrMode(int) 
 * <br> numAttributes() 
 * <br> numClasses() 
 * <br> numDistinctValues(Attribute) 
 * <br> numDistinctValues(int) 
 * <br> numInstances() 
 * <br> randomize(Random) 
 * <br> renameAttribute(Attribute, String) 
 * <br> renameAttribute(int, String) 
 * <br> renameAttributeValue(Attribute, String, String) 
 * <br> renameAttributeValue(int, int, String) 
 * <br> resample(Random) 
 * <br> resampleWithWeights(Random) 
 * <br> resampleWithWeights(Random, double[]) 
 * <br> sort(Attribute) 
 * <br> sort(int) 
 * <br> stringFreeStructure()  
 * <br> sumOfWeights() 
 * <br> variance(Attribute) 
 * <br> variance(int) 
 *
 * Typical usage is to read the instances from the Reader and try to feed
 * it into into an array of Exemplars according to it's ID values 
 *
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @author Xin XU (xx5@cs.waikato.ac.nz)
 * @version $Revision: 1.33 $ 
 */
public class Exemplar implements Serializable {

    /** The exemplar's ID attribute */
    private int m_IdIndex;

    /** The value of the ID of this exemplar */
    private double m_IdValue;

    /** The class index of this exemplar */
    private int m_ClassIndex;

    /** The class value of this exemplar */
    private double m_ClassValue;

    /** The instances in the exemplar*/
    private Instances m_Instances;

    /** The weight of this exemplar */
    private double m_Weight = 1;

    /**
     * Constructor using one instance to form an exemplar
     * 
     * @param instance the given instance
     * @param id the ID index
     */
    public Exemplar(Instance inst, int id) {
        m_IdIndex = id;
        m_IdValue = inst.value(id);
        m_ClassIndex = inst.classIndex();
        m_ClassValue = inst.classValue();
        m_Instances = new Instances(inst.dataset(), 1);
        m_Instances.add(inst);
    }

    /** 
     * Constructor creating an empty Exemplar with the same structure
     * of the given exemplar and the given size
     *
     * @param exemplar the given exemplar
     * @param size the given size
     */
    public Exemplar(Exemplar exemplar, int size) {
        m_IdIndex = exemplar.m_IdIndex;
        m_IdValue = exemplar.m_IdValue;
        m_ClassIndex = exemplar.m_ClassIndex;
        m_ClassValue = exemplar.m_ClassValue;
        m_Instances = new Instances(exemplar.getInstances(), size);
    }

    /**
     * Constructor to form an exemplar by copying from another exemplar
     *
     * @param exemplar the copied exemplar
     */
    public Exemplar(Exemplar exemplar) {
        m_IdIndex = exemplar.m_IdIndex;
        m_IdValue = exemplar.m_IdValue;
        m_ClassIndex = exemplar.m_ClassIndex;
        m_ClassValue = exemplar.m_ClassValue;
        m_Instances = new Instances(exemplar.m_Instances);
        m_Weight = exemplar.m_Weight;
    }

    /**
     * Constructor copying all instances and references to
     * the header information from the given set of instances.
     * Set the attribute with first index as exemplar ID
     * 
     * @param instances the set to be copied
     * @exception if the exemplar cannot be built properly
     */
    public Exemplar(Instances dataset) throws Exception {
        this(dataset, 0);
    }

    /**
     * Constructor creating an exemplar with the given dataset and the 
     * given ID index
     *
     * @param instances the instances from which the header 
     * information is to be taken
     * @param id the index of the ID of the exemplar 
     */
    public Exemplar(Instances dataset, int id) throws Exception {
        m_IdIndex = id;
        m_ClassIndex = dataset.classIndex();
        m_Instances = new Instances(dataset);

        if (!m_Instances.attribute(m_IdIndex).isNominal())
            throw new Exception("The exempler's ID is not nominal!");

        double idvalue = (m_Instances.firstInstance()).value(m_IdIndex);
        double clsvalue = (m_Instances.firstInstance()).classValue();

        // The the validity of this exemplar
        for (int i = 1; i < m_Instances.numInstances(); i++) {
            Instance inst = m_Instances.instance(i);
            if ((!Utils.eq(inst.value(m_IdIndex), idvalue))
            //|| (!Utils.eq(inst.classValue(), clsvalue))
            )
                throw new Exception("The Id value and/or class value is not unique!");
        }

        m_IdValue = idvalue;
        m_ClassValue = clsvalue;
    }

    /**
     * Adds one instance to the end of the set. 
     * Shallow copies instance before it is added. Increases the
     * size of the dataset if it is not large enough. Does not
     * check if the instance is compatible with the dataset.
     *
     * @param instance the instance to be added
     */
    public final void add(Instance instance) {
        Instance inst = (Instance) instance.copy();

        if (!checkInstance(instance))
            throw new IllegalArgumentException(
                    "The Id value and/or class value " + "is not compatible: add failed.");
        else
            m_Instances.add(inst);
    }

    /**
     * Checks if the given instance is compatible with this 
     * Exemplar. 
     *
     * @return true if the instance is compatible with the exemplar 
     */
    public final boolean checkInstance(Instance instance) {
        if (!m_Instances.checkInstance(instance))
            return false;

        if ((!Utils.eq(instance.value(m_IdIndex), m_IdValue))
        //|| (!Utils.eq(instance.classValue(), m_ClassValue))
        )
            return false;

        return true;
    }

    /**
     * Returns the class attribute.
     *
     * @return the class attribute
     * @exception UnassignedClassException if the class is not set
     */
    public final Attribute classAttribute() {

        if (m_ClassIndex < 0) {
            throw new UnassignedClassException("Class index is negative (not set)!");
        }
        return m_Instances.attribute(m_ClassIndex);
    }

    /**
     * Returns the class attribute's index. Returns negative number
     * if it's undefined.
     *
     * @return the class index as an integer
     */
    public final int classIndex() {

        return m_ClassIndex;
    }

    /**
     * Returns the class value of this exemplar.
     *
     * @return the class value
     * @exception UnassignedClassException if the class is not set
     */
    public final double classValue() {
        if (m_ClassIndex < 0) {
            throw new UnassignedClassException("Class index is negative (not set)!");
        }
        return m_ClassValue;
    }

    /**
     * Sets the class value of the exemplar
     *
     *@param cv the new class value
     */
    public void setClassValue(double cv) {
        m_ClassValue = cv;
    }

    /**
     * Compactifies the set of instances in this exemplar
     */
    public final void compactify() {

        m_Instances.compactify();
    }

    /**
     * Deletes an attribute at the given position 
     * (0 to numAttributes() - 1). A deep copy of the attribute
     * information is performed before the attribute is deleted.
     *
     * @param pos the attribute's position
     * @exception IllegalArgumentException if the given index is out of range or the
     * class attribute is being deleted
     */
    public void deleteAttributeAt(int position) throws IllegalArgumentException {
        if (m_ClassIndex > position)
            m_ClassIndex--;
        if (m_IdIndex > position)
            m_IdIndex--;

        m_Instances.deleteAttributeAt(position);
    }

    /**
     * Returns an enumeration of all the attributes.
     *
     * @return enumeration of all the attributes.
     */
    public Enumeration enumerateAttributes() {
        FastVector vector = new FastVector(0);
        for (int i = 0; i < m_Instances.numAttributes(); i++)
            if ((i != m_ClassIndex) && (i != m_IdIndex))
                vector.addElement((Object) m_Instances.attribute(i));

        return vector.elements();
    }

    /**
     * Returns the dataset in this exemplar
     *
     * @return all the instances in the exemplar
     */
    public Instances getInstances() {
        return m_Instances;
    }

    /**
      * Returns the ID attribute.
      *
      * @return the ID attribute
      */
    public final Attribute idAttribute() {
        return m_Instances.attribute(m_IdIndex);
    }

    /**
     * Returns the ID attribute's index. 
     *
     * @return the ID index as an integer
     */
    public final int idIndex() {
        return m_IdIndex;
    }

    /**
     * Returns the ID attribute's value. 
     *
     * @return the ID value of this exemplar
     */
    public final double idValue() {
        return m_IdValue;
    }

    /**
     * Inserts an attribute at the given position (0 to 
     * numAttributes()) and sets all values to be missing.
     * Shallow copies the attribute before it is inserted, and performs
     * a deep copy of the existing attribute information.
     *
     * @param att the attribute to be inserted
     * @param pos the attribute's position
     * @exception IllegalArgumentException if the given index is out of range
     */
    public void insertAttributeAt(Attribute att, int position) {

        if (m_ClassIndex >= position)
            m_ClassIndex++;
        if (m_IdIndex >= position)
            m_IdIndex++;
        m_Instances.insertAttributeAt(att, position);
    }

    /**
     * Returns the mean (mode) for all attributes (except ID and class) as
     * a floating-point value. 
     * Returns 0 if the attribute is neither nominal nor 
     * numeric. If all values are missing it returns zero.
     *
     * @return the mean or the mode of all attributes
     */
    public final double[] meanOrMode() {
        int numAttr = m_Instances.numAttributes() - 2; // Excluding ID and class
        double[] mean = new double[numAttr];
        int j = 0;

        for (int i = 0; i < (numAttr + 2); i++) {
            if ((i != m_IdIndex) && (i != m_ClassIndex)) {
                if (Utils.gr(m_Instances.sumOfWeights(), 0.0) && !isAllMissing(i))
                    mean[j] = m_Instances.meanOrMode(i);
                else
                    mean[j] = Double.NaN;
                j++;
            }
        }

        return mean;
    }

    /**
     * Computes the variances for all numeric attributes.
     * For nominal attribute, the variance is set to -1
     *
     * @return the variances for all numeric attributes
     */
    public final double[] variance() {
        int numAttr = m_Instances.numAttributes() - 2; // Excluding ID and class
        double[] var = new double[numAttr];
        int j = 0;
        for (int i = 0; i < (numAttr + 2); i++) {
            if ((i != m_IdIndex) && (i != m_ClassIndex)) {
                if (m_Instances.attribute(i).isNumeric())
                    var[j] = m_Instances.variance(i);
                else
                    var[j] = -1;

                j++;
            }
        }

        return var;
    }

    /**
     * Returns the number of ID labels.
     *
     * @return the number of ID labels.
     */
    public final int numIds() {
        return idAttribute().numValues();
    }

    /**
     * Sets the weight of the exemplar.
     *
     * @param weight the weight
     */
    public final void setWeight(double weight) {
        m_Weight = weight;
    }

    /**
     * Returns the exemplar as a string. Strings
     * are quoted if they contain whitespace characters, or if they
     * are a question mark.
     *
     * @return the exemplar as a string
     */
    public final String toString() {

        StringBuffer text = new StringBuffer();

        Attribute id = idAttribute();
        Attribute cl = classAttribute();
        text.append("@Exemplar: \nID: " + id.name() + " = " + id.value((int) m_IdValue) + "\nClass: " + cl.name()
                + " = " + cl.value((int) m_ClassValue) + "\n\n");

        for (int i = 0; i < m_Instances.numAttributes(); i++) {
            text.append(m_Instances.attribute(i));
            if (i == m_IdIndex)
                text.append(" (ID Attribute)");
            else if (i == m_ClassIndex)
                text.append(" (Class Attribute)");

            text.append("\n");
        }

        text.append("\n@data\n");
        for (int i = 0; i < m_Instances.numInstances(); i++) {
            text.append(m_Instances.instance(i) + "\n");
        }
        return text.toString();
    }

    /**
     * Returns the exemplar's weight.
     *
     * @return the exemplar's weight as a double
     */
    public final double weight() {
        return m_Weight;
    }

    /**
     * Check whether the values of the specified attribute in this exemplar
     * are all missing values
     *
     * @param attIndex the specified attribute index
     * @return whether values are all missing
     */
    public final boolean isAllMissing(int attIndex) {
        for (int i = 0; i < m_Instances.numInstances(); i++)
            if (!m_Instances.instance(i).isMissing(attIndex))
                return false;
        return true;
    }

    /**
     * Main method for testing this class -- just prints out a set
     * of Exemplars.  Assume the ID index is 0.
     *
     * @param argv should contain one element: the name of an ARFF file
     */
    public static void main(String[] args) {

        try {
            Reader r = null;
            if (args.length > 1) {
                throw (new Exception("Usage: Instances <filename>"));
            } else if (args.length == 0) {
                r = new BufferedReader(new InputStreamReader(System.in));
            } else {
                r = new BufferedReader(new FileReader(args[0]));
            }
            Instances i = new Instances(r);
            i.setClassIndex(i.numAttributes() - 1);

            Attribute id = i.attribute(0);
            if (!id.isNominal())
                throw new Exception("The first attribute is not nominal");

            Exemplar[] egs = new Exemplar[id.numValues()];
            for (int j = 0; j < egs.length; j++)
                egs[j] = null;

            for (int j = 0; j < i.numInstances(); j++) {
                Instance ins = i.instance(j);
                int idv = (int) ins.value(0);
                if (egs[idv] == null)
                    egs[idv] = new Exemplar(ins, 0);
                else
                    egs[idv].add(ins);
            }

            for (int j = 0; j < egs.length; j++)
                System.out.println(egs[j].toString());
        } catch (Exception ex) {
            System.err.println(ex.getMessage());
        }
    }
}