tclass.clusteralg.EMClusterer.java Source code

Java tutorial

Introduction

Here is the source code for tclass.clusteralg.EMClusterer.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
  * This is an example of a clustering algorithm, in other words, 
  * what we expect our clustering algorithms to be able to do. 
  *
  * 
  * @author Waleed Kadous
  * @version $Id: EMClusterer.java,v 1.1.1.1 2002/06/28 07:36:16 waleed Exp $
  */

package tclass.clusteralg;

import tclass.ClassStreamEventsVecI;
import tclass.ClusterAlgI;
import tclass.ClusterI;
import tclass.ClusterMem;
import tclass.ClusterVecI;
import tclass.DomDesc;
import tclass.EventDescI;
import tclass.EventDescVecI;
import tclass.EventI;
import tclass.EventVecI;
import tclass.InvalidParameterException;
import tclass.Param;
import tclass.ParamVec;
import tclass.StreamEventsI;
import tclass.StreamI;
import tclass.WekaBridge;
import weka.core.Instance;
import weka.core.Instances;

public class EMClusterer implements ClusterAlgI {

    String baseName = "em";
    String description = "Implements an EM clustering algorithm by using WEKA to do everything";
    EventDescVecI edvi = null;
    DomDesc domDesc = null;
    private int pepIndex = 0;
    private boolean ignoreClasses = true;
    private boolean ignoreTime = false;
    private boolean autoNumClusters = true;
    int numClusters = 0;

    /**
     * Name of this clustering algorithm. 
     */

    public String name() {
        return baseName;
    }

    /**
     * Clone the current object. 
     *
     */

    @Override
    public Object clone() {
        try {
            return super.clone();
        } catch (CloneNotSupportedException e) {
            // Can't happen, or so the java programming book says
            throw new InternalError(e.toString());
        }
    }

    public void setDomDesc(DomDesc dd) {
        domDesc = dd;
    }

    /**
     * Set the description of the incoming Class Stream Events Vector
     * Note that we need this first ... for the parsing of values,
     * before we do any actual processing. I expect that the
     * ClusterAlgMgr should have a copy of it and passes it through in
     * the constructor, pretty much like the Domain description for the 
     * GlobalExtrMgr
     */

    public void setEventDescVec(EventDescVecI events) {
        edvi = events;
    }

    /** 
    * Provides a description of the clustering algorithm.
        
    * This description explains what the basic idea of the clustering
    * algorihtm is (i.e. the sort of shapes it tried to find). It
    * should also explain any potential configuration options that
    * may be used to configure the object, using the configure
    * option.
    * 
    * @return The description of this class.  
    */

    public String description() {
        return description;
    }

    /** 
    *
    * Describes any parameters used by this global extractor,
    * to suit a particular domain. 
    *
    * @return A vector of parameters. 
    */
    public ParamVec getParamList() {
        ParamVec pv = new ParamVec();
        pv.add(new Param("metafeature", "The name of the metafeature to apply this to", "First"));
        pv.add(new Param("numclusters", "Number of clusters. Possible values: auto or a number", "auto"));
        pv.add(new Param("ignoreclass", "Completely ignore class information. Can be true or false", "false"));
        pv.add(new Param("ignoretime",
                "Ignore time (assuming the first attribute of any metafeature is time).. Can be true or false",
                "false"));
        return pv;
    }

    public void setParam(String p, String v) throws InvalidParameterException {
        if (p.equals("metafeature")) {
            // So they want to use a particular metafeature. 
            //Try to find the metafeature. 
            pepIndex = edvi.elIndex(v);
            if (pepIndex == -1) {
                throw new InvalidParameterException(p, v, "Unknown metafeature " + v);
            }
        } else if (p.equals("numclusters")) {
            if (v.equals("auto")) {
                autoNumClusters = true;
            } else {
                try {
                    numClusters = Integer.parseInt(v);
                } catch (NumberFormatException nfe) {
                    throw new InvalidParameterException(p, v, "Could not understand number of exceptions.");

                }
            }
        } else if (p.equals("ignoretime")) {
            if (v.equals("true")) {
                ignoreTime = true;
            } else if (v.equals("false")) {
                ignoreTime = false;
            } else {
                throw new InvalidParameterException(p, v, "Must be either true or false");
            }
        } else if (p.equals("ignoreclass")) {
            if (v.equals("true")) {
                ignoreClasses = true;
            } else if (v.equals("false")) {
                ignoreClasses = false;
            } else {
                throw new InvalidParameterException(p, v, "Must be either true or false");
            }

        } else {
            throw new InvalidParameterException(p, v, "Unknown parameter " + p);
        }
    }

    public ClusterVecI cluster(ClassStreamEventsVecI csevi) {
        try {
            // System.out.println("Ok, here's the stuff ..."); 
            // System.out.println(csevi); 
            // System.out.println(edvi);
            // System.out.println("PI = " + pepIndex); 
            // System.out.println("IgnoreTime = " + ignoreTime); 
            Instances stuff = WekaBridge.makeInstances(csevi, domDesc.getClassDescVec(), edvi, pepIndex,
                    ignoreClasses, ignoreTime);
            EM myClusterer = new EM();
            // myClusterer.setDebug(true);
            if (stuff.numInstances() > 10) {
                myClusterer.setNumClusters(-1);
                myClusterer.setNumCVs(10);
            } else {
                myClusterer.setNumClusters(2);
            }
            myClusterer.buildClusterer(stuff);
            return new EMClusterVec(myClusterer, pepIndex, edvi.elAt(pepIndex), stuff, ignoreTime);
        } catch (Exception e) {
            System.err.println("AAAARGH!! Clustering failed.");
            e.printStackTrace();
        }
        return null;
    }

}

class EMClusterVec implements ClusterVecI {
    EM em;
    int pepIndex;
    boolean ignoreTime;
    EventDescI edi;
    Instances stuff;
    EMCluster[] myClusters;

    EMClusterVec(EM em, int pepIndex, EventDescI edi, Instances stuff, boolean ignoreTime) {
        this.em = em;
        this.pepIndex = pepIndex;
        this.edi = edi;
        this.stuff = stuff;
        myClusters = new EMCluster[size()];
        for (int i = 0; i < myClusters.length; i++) {
            myClusters[i] = new EMCluster(this, pepIndex, i, edi, ignoreTime);
        }
        this.ignoreTime = ignoreTime;
    }

    public float prob(Instance inst, int cluster) {
        try {
            inst.setDataset(stuff);
            float retval = (float) em.probInstanceInCluster(inst, cluster);
            return retval;
        } catch (Exception e) {
            System.err.println("AAAARGH!!! Could not get distribution f for instance!");
            e.printStackTrace();
        }
        return 0;
    }

    public ClusterMem findBestLabel(EventI ev) {
        // System.out.println("Trying to find best label for " + ev); 

        Instance inst = WekaBridge.makeInstance(ev, edi, ignoreTime);
        inst.setDataset(stuff);
        try {
            int cluster = em.clusterInstance(inst);
            float conf = (float) em.probInstanceInCluster(inst, cluster);
            // if(conf > 1){
            //    System.out.println("WTF?? Label is " + (new ClusterMem(elAt(cluster), conf)).toString() );               //  }
            return new ClusterMem(elAt(cluster), conf);

        } catch (Exception e) {
            System.out.println("Label finding failed. AAAAAARGH!!!");
            e.printStackTrace();
            return null;
        }
    }

    public String getClusterDesc(int cluster) {
        return em.describeCluster(cluster);
    }

    public int size() {
        try {
            return em.numberOfClusters();
        } catch (Exception e) {
            System.err.println("AAAARGH!!! Could not get number of instances!");
        }
        return 0;
    }

    public ClusterI elAt(int i) {
        return myClusters[i];

    }

    /**
     * Clone the current object. 
     *
     */

    @Override
    public Object clone() {
        try {
            return super.clone();
        } catch (CloneNotSupportedException e) {
            // Can't happen, or so the java programming book says
            throw new InternalError(e.toString());
        }
    }

}

class EMCluster implements ClusterI {
    EMClusterVec parent;
    int pepIndex;
    int clusterIndex;
    boolean ignoreTime;
    EventDescI edi;
    String myName = "unnamed";

    public String getName() {
        return myName;
    }

    public void setName(String newname) {
        // System.out.println("Name set to  " + newname); 
        myName = newname;
    }

    public EMCluster(EMClusterVec parent, int pepIndex, int clusterIndex, EventDescI edi, boolean ignoreTime) {
        this.parent = parent;
        this.pepIndex = pepIndex;
        this.clusterIndex = clusterIndex;
        this.edi = edi;
        this.ignoreTime = ignoreTime;
    }

    public String getDescription() {
        return parent.getClusterDesc(clusterIndex);
    }

    @Override
    public String toString() {
        return myName + " " + parent.getClusterDesc(clusterIndex);
    }

    public float findMatch(StreamI si, StreamEventsI sei) {
        EventVecI events = sei.getEvents(pepIndex);
        int numEvents = events.size();
        float bestEventDistance = Float.MAX_VALUE;
        for (int i = 0; i < numEvents; i++) {
            Instance inst = WekaBridge.makeInstance(events.elAt(i), edi, ignoreTime);
            float dist = parent.prob(inst, clusterIndex);
            if (dist < bestEventDistance) {
                bestEventDistance = dist;
            }
        }
        return bestEventDistance;
    }

    @Override
    public Object clone() {
        try {
            return super.clone();
        } catch (CloneNotSupportedException e) {
            // Can't happen, or so the java programming book says
            throw new InternalError(e.toString());
        }
    }

}