List of usage examples for weka.core NormalizableDistance setDontNormalize
public void setDontNormalize(boolean dontNormalize)
From source file:org.iobserve.analysis.behavior.karlsruhe.XMeansClustering.java
License:Apache License
/** * * @param instances// w w w . ja v a 2 s. c om * data to cluster in Weka format * @param numberOfUserGroupsFromInputUsageModel * is the input number of clusters * @param varianceOfUserGroups * enables the creation of a minimum and maximum number of clusters * @param seed * states a random determination of the initial centroids * @return the clustering results that contain the number of cluster and the assignments */ public ClusteringResults clusterSessionsWithXMeans(final Instances instances, final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) { ClusteringResults xMeansClusteringResults = null; try { final XMeans xmeans = new XMeans(); xmeans.setSeed(seed); final NormalizableDistance manhattenDistance = new ManhattanDistance(); manhattenDistance.setDontNormalize(false); manhattenDistance.setInstances(instances); xmeans.setDistanceF(manhattenDistance); int[] clustersize = null; final int[] assignments = new int[instances.numInstances()]; // Determines the range of clusters // The X-Means clustering algorithm determines the best fitting number of clusters // within this range by itself int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups; int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups; if (numberOfClustersMax < 2) { numberOfClustersMax = 1; numberOfClustersMin = 1; } else { if (numberOfClustersMin < 2) { numberOfClustersMin = 2; } } xmeans.setMinNumClusters(numberOfClustersMin); xmeans.setMaxNumClusters(numberOfClustersMax); xmeans.buildClusterer(instances); clustersize = new int[xmeans.getClusterCenters().numInstances()]; for (int s = 0; s < instances.numInstances(); s++) { assignments[s] = xmeans.clusterInstance(instances.instance(s)); clustersize[xmeans.clusterInstance(instances.instance(s))]++; } final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances, assignments); clusteringMetrics.calculateSimilarityMetrics(); xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(), assignments, clusteringMetrics); } catch (final Exception e) { // NOPMD NOCS due to broken xmeans implementation triggering // Exception e.printStackTrace(); } return xMeansClusteringResults; }
From source file:org.iobserve.analysis.userbehavior.XMeansClustering.java
License:Apache License
/** * * @param instances//from w ww.j a va2s . c om * data to cluster in Weka format * @param numberOfUserGroupsFromInputUsageModel * is the input number of clusters * @param varianceOfUserGroups * enables the creation of a minimum and maximum number of clusters * @param seed * states a random determination of the initial centroids * @return the clustering results that contain the number of cluster and the assignments */ public ClusteringResults clusterSessionsWithXMeans(final Instances instances, final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) { ClusteringResults xMeansClusteringResults = null; try { final XMeans xmeans = new XMeans(); xmeans.setSeed(seed); final NormalizableDistance manhattenDistance = new ManhattanDistance(); manhattenDistance.setDontNormalize(false); manhattenDistance.setInstances(instances); xmeans.setDistanceF(manhattenDistance); int[] clustersize = null; final int[] assignments = new int[instances.numInstances()]; // Determines the range of clusters // The X-Means clustering algorithm determines the best fitting number of clusters // within this range by itself int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups; int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups; if (numberOfClustersMax < 2) { numberOfClustersMax = 1; numberOfClustersMin = 1; } else { if (numberOfClustersMin < 2) { numberOfClustersMin = 2; } } xmeans.setMinNumClusters(numberOfClustersMin); xmeans.setMaxNumClusters(numberOfClustersMax); xmeans.buildClusterer(instances); clustersize = new int[xmeans.getClusterCenters().numInstances()]; for (int s = 0; s < instances.numInstances(); s++) { assignments[s] = xmeans.clusterInstance(instances.instance(s)); clustersize[xmeans.clusterInstance(instances.instance(s))]++; } final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances, assignments); clusteringMetrics.calculateSimilarityMetrics(); xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(), assignments, clusteringMetrics); } catch (final Exception e) { // NOCS due to broken xmeans implementation triggering // Exception e.printStackTrace(); } return xMeansClusteringResults; }