Example usage for weka.core NormalizableDistance setInstances

List of usage examples for weka.core NormalizableDistance setInstances

Introduction

In this page you can find the example usage for weka.core NormalizableDistance setInstances.

Prototype

@Override
public void setInstances(Instances insts) 

Source Link

Document

Sets the instances.

Usage

From source file:org.iobserve.analysis.behavior.karlsruhe.XMeansClustering.java

License:Apache License

/**
 *
 * @param instances// w  w w .ja v a2  s  .c  o  m
 *            data to cluster in Weka format
 * @param numberOfUserGroupsFromInputUsageModel
 *            is the input number of clusters
 * @param varianceOfUserGroups
 *            enables the creation of a minimum and maximum number of clusters
 * @param seed
 *            states a random determination of the initial centroids
 * @return the clustering results that contain the number of cluster and the assignments
 */
public ClusteringResults clusterSessionsWithXMeans(final Instances instances,
        final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {

    ClusteringResults xMeansClusteringResults = null;

    try {

        final XMeans xmeans = new XMeans();
        xmeans.setSeed(seed);

        final NormalizableDistance manhattenDistance = new ManhattanDistance();
        manhattenDistance.setDontNormalize(false);
        manhattenDistance.setInstances(instances);
        xmeans.setDistanceF(manhattenDistance);

        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];

        // Determines the range of clusters
        // The X-Means clustering algorithm determines the best fitting number of clusters
        // within this range by itself
        int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
        int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
        if (numberOfClustersMax < 2) {
            numberOfClustersMax = 1;
            numberOfClustersMin = 1;
        } else {
            if (numberOfClustersMin < 2) {
                numberOfClustersMin = 2;
            }
        }

        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);
        xmeans.buildClusterer(instances);

        clustersize = new int[xmeans.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }
        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(),
                assignments, clusteringMetrics);

    } catch (final Exception e) { // NOPMD NOCS due to broken xmeans implementation triggering
                                  // Exception
        e.printStackTrace();
    }

    return xMeansClusteringResults;
}

From source file:org.iobserve.analysis.userbehavior.XMeansClustering.java

License:Apache License

/**
 *
 * @param instances/*from   ww w . ja v  a2s.c  o  m*/
 *            data to cluster in Weka format
 * @param numberOfUserGroupsFromInputUsageModel
 *            is the input number of clusters
 * @param varianceOfUserGroups
 *            enables the creation of a minimum and maximum number of clusters
 * @param seed
 *            states a random determination of the initial centroids
 * @return the clustering results that contain the number of cluster and the assignments
 */
public ClusteringResults clusterSessionsWithXMeans(final Instances instances,
        final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {

    ClusteringResults xMeansClusteringResults = null;

    try {

        final XMeans xmeans = new XMeans();
        xmeans.setSeed(seed);

        final NormalizableDistance manhattenDistance = new ManhattanDistance();
        manhattenDistance.setDontNormalize(false);
        manhattenDistance.setInstances(instances);
        xmeans.setDistanceF(manhattenDistance);

        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];

        // Determines the range of clusters
        // The X-Means clustering algorithm determines the best fitting number of clusters
        // within this range by itself
        int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
        int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
        if (numberOfClustersMax < 2) {
            numberOfClustersMax = 1;
            numberOfClustersMin = 1;
        } else {
            if (numberOfClustersMin < 2) {
                numberOfClustersMin = 2;
            }
        }

        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);
        xmeans.buildClusterer(instances);

        clustersize = new int[xmeans.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }

        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(),
                assignments, clusteringMetrics);

    } catch (final Exception e) { // NOCS due to broken xmeans implementation triggering
                                  // Exception
        e.printStackTrace();
    }

    return xMeansClusteringResults;
}