Example usage for weka.core NormalizableDistance setDontNormalize

List of usage examples for weka.core NormalizableDistance setDontNormalize

Introduction

In this page you can find the example usage for weka.core NormalizableDistance setDontNormalize.

Prototype

public void setDontNormalize(boolean dontNormalize) 

Source Link

Document

Sets whether if the attribute values are to be normalized in distance calculation.

Usage

From source file:org.iobserve.analysis.behavior.karlsruhe.XMeansClustering.java

License:Apache License

/**
 *
 * @param instances// w w w  . ja v a  2 s.  c om
 *            data to cluster in Weka format
 * @param numberOfUserGroupsFromInputUsageModel
 *            is the input number of clusters
 * @param varianceOfUserGroups
 *            enables the creation of a minimum and maximum number of clusters
 * @param seed
 *            states a random determination of the initial centroids
 * @return the clustering results that contain the number of cluster and the assignments
 */
public ClusteringResults clusterSessionsWithXMeans(final Instances instances,
        final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {

    ClusteringResults xMeansClusteringResults = null;

    try {

        final XMeans xmeans = new XMeans();
        xmeans.setSeed(seed);

        final NormalizableDistance manhattenDistance = new ManhattanDistance();
        manhattenDistance.setDontNormalize(false);
        manhattenDistance.setInstances(instances);
        xmeans.setDistanceF(manhattenDistance);

        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];

        // Determines the range of clusters
        // The X-Means clustering algorithm determines the best fitting number of clusters
        // within this range by itself
        int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
        int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
        if (numberOfClustersMax < 2) {
            numberOfClustersMax = 1;
            numberOfClustersMin = 1;
        } else {
            if (numberOfClustersMin < 2) {
                numberOfClustersMin = 2;
            }
        }

        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);
        xmeans.buildClusterer(instances);

        clustersize = new int[xmeans.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }
        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(),
                assignments, clusteringMetrics);

    } catch (final Exception e) { // NOPMD NOCS due to broken xmeans implementation triggering
                                  // Exception
        e.printStackTrace();
    }

    return xMeansClusteringResults;
}

From source file:org.iobserve.analysis.userbehavior.XMeansClustering.java

License:Apache License

/**
 *
 * @param instances//from w ww.j  a va2s  .  c om
 *            data to cluster in Weka format
 * @param numberOfUserGroupsFromInputUsageModel
 *            is the input number of clusters
 * @param varianceOfUserGroups
 *            enables the creation of a minimum and maximum number of clusters
 * @param seed
 *            states a random determination of the initial centroids
 * @return the clustering results that contain the number of cluster and the assignments
 */
public ClusteringResults clusterSessionsWithXMeans(final Instances instances,
        final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {

    ClusteringResults xMeansClusteringResults = null;

    try {

        final XMeans xmeans = new XMeans();
        xmeans.setSeed(seed);

        final NormalizableDistance manhattenDistance = new ManhattanDistance();
        manhattenDistance.setDontNormalize(false);
        manhattenDistance.setInstances(instances);
        xmeans.setDistanceF(manhattenDistance);

        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];

        // Determines the range of clusters
        // The X-Means clustering algorithm determines the best fitting number of clusters
        // within this range by itself
        int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
        int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
        if (numberOfClustersMax < 2) {
            numberOfClustersMax = 1;
            numberOfClustersMin = 1;
        } else {
            if (numberOfClustersMin < 2) {
                numberOfClustersMin = 2;
            }
        }

        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);
        xmeans.buildClusterer(instances);

        clustersize = new int[xmeans.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }

        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(),
                assignments, clusteringMetrics);

    } catch (final Exception e) { // NOCS due to broken xmeans implementation triggering
                                  // Exception
        e.printStackTrace();
    }

    return xMeansClusteringResults;
}