List of usage examples for weka.core EuclideanDistance EuclideanDistance
public EuclideanDistance(Instances data)
From source file:agnes.MyAgnes.java
public void buildClusterer(Instances data) { distanceCounter = new EuclideanDistance(data); ArrayList<ArrayList<Instance>> currentClusters = new ArrayList<>(); for (int i = 0; i < data.numInstances(); i++) { currentClusters.add(new ArrayList<>()); currentClusters.get(i).add(data.instance(i)); instanceID.put(data.instance(i), i); }//from w w w . ja v a2 s . c om addNewClusterHierarchy(currentClusters); }
From source file:kmeans.MyKMeans.java
void clusteringInstance() { ArrayList<Integer> tempList = new ArrayList<Integer>(); Instances tempInst = new Instances(dataSource, 0); EuclideanDistance ed = new EuclideanDistance(centroid); int[] pointList = new int[numCluster]; int checkNumberChange = 0; for (int i = 0; i < numCluster; i++) { pointList[i] = i;/* ww w . j a va 2 s. co m*/ } for (int i = 0; i < dataSource.numInstances(); i++) { int clusterNumber = -1; Instance currentInst = dataSource.get(i); try { clusterNumber = ed.closestPoint(currentInst, centroid, pointList); } catch (Exception ex) { System.out.println("************** " + ex.toString()); } int clusterBefore = clusteredInstance.put(i, clusterNumber); if (clusterNumber != clusterBefore) checkNumberChange++; } if (checkNumberChange != 0) finish = false; else finish = true; updateListClusteredInstance(); // printListClusteredInstance(); }
From source file:kmeans_extend.MyKMeans.java
void clusteringInstance() { ArrayList<Integer> tempList = new ArrayList<Integer>(); Instances tempInst = new Instances(dataSource, 0); EuclideanDistance ed = new EuclideanDistance(centroid); int[] pointList = new int[noOfClusters]; int checkNumberChange = 0; for (int i = 0; i < noOfClusters; i++) { pointList[i] = i;/* w w w. j av a 2 s . c om*/ } for (int i = 0; i < dataSource.numInstances(); i++) { int clusterNumber = -1; Instance currentInst = dataSource.get(i); try { clusterNumber = ed.closestPoint(currentInst, centroid, pointList); } catch (Exception ex) { System.out.println("************** " + ex.toString()); } int clusterBefore = clusteredInstance.put(i, clusterNumber); if (clusterNumber != clusterBefore) checkNumberChange++; } if (checkNumberChange != 0) finish = false; else finish = true; updateListClusteredInstance(); // printListClusteredInstance(); }
From source file:kmeans_extend.MyKMeans.java
public int clusterInstance(Instance instance) { int clusterNo = -1; EuclideanDistance ed = new EuclideanDistance(centroid); int[] pointList = new int[noOfClusters]; for (int i = 0; i < noOfClusters; i++) { pointList[i] = i;/*from w ww. j a v a 2 s. c om*/ } try { clusterNo = ed.closestPoint(instance, centroid, pointList); } catch (Exception ex) { System.out.println("************** " + ex.toString()); } return clusterNo; }
From source file:lu.lippmann.cdb.datasetview.tabs.UnsupervisedFeatureEvaluationTabView.java
License:Open Source License
private static Instances buildDerivatedDatasetForFeaturesClusters(final Instances dataSet, final int k) throws Exception { final Instances trdataSet = WekaDataProcessingUtil.buildTransposedDataSet(dataSet); final EuclideanDistance distanceFunction = new EuclideanDistance(trdataSet); final SimpleKMeans skm = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(k, distanceFunction); skm.buildClusterer(trdataSet);/* w w w. ja va 2 s . c om*/ final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(skm); eval.evaluateClusterer(trdataSet); final int numClusters = eval.getNumClusters(); final List<String> possibleValues = new ArrayList<String>(numClusters); for (int c = 0; c < numClusters; c++) possibleValues.add("cluster_" + c); final double[] clusterAssignments = eval.getClusterAssignments(); final int numAttributes = dataSet.numAttributes(); final List<Integer> valueForEachFeature = new ArrayList<Integer>(numAttributes); for (int j = 0; j < numAttributes; j++) { //System.out.println(clusterAssignments[j]+" "+(int)clusterAssignments[j]); valueForEachFeature.add((int) clusterAssignments[j]); } return buildDerivatedDataset(dataSet, possibleValues, valueForEachFeature); }
From source file:lu.lippmann.cdb.datasetview.tasks.AddCentroidDistanceTask.java
License:Open Source License
/** * {@inheritDoc}/*from w ww. j a v a 2 s . co m*/ */ @Override Instances process0(final Instances dataSet) throws Exception { return WekaMachineLearningUtil.buildDataSetWithCentroidDistanceAsNewFeature(dataSet, new EuclideanDistance(dataSet)); }
From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java
License:Open Source License
/** * /*from www . j a v a2 s .c o m*/ * @param instances */ public KmeansImproved(final Instances pinstances, final int maxClusterSize) { try { this.instances = new Instances(pinstances); this.distance = new EuclideanDistance(this.instances); this.maxClusters = maxClusterSize; } catch (Exception e) { e.printStackTrace(); } }
From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java
License:Open Source License
/** * /*from w ww . j a v a 2 s. c om*/ */ public static CollapsedInstances distanceBetweenInstances(final Instances instances, final MDSDistancesEnum distEnum, final int maxInstances, final boolean ignoreClassInDistance) throws Exception { KmeansResult mapCentroids = null; final NormalizableDistance usedDist; if (distEnum.equals(MDSDistancesEnum.EUCLIDEAN)) { usedDist = new EuclideanDistance(instances); //usedDist.setDontNormalize(true); //usedDist.setAttributeIndices("1"); //usedDist.setInvertSelection(true); } else if (distEnum.equals(MDSDistancesEnum.MANHATTAN)) usedDist = new ManhattanDistance(instances); else if (distEnum.equals(MDSDistancesEnum.MINKOWSKI)) { usedDist = new MinkowskiDistance(instances); final String[] parameters = MDSDistancesEnum.MINKOWSKI.getParameters(); //Change order double order = Double.valueOf(parameters[0]).doubleValue(); ((MinkowskiDistance) usedDist).setOrder(order); } else if (distEnum.equals(MDSDistancesEnum.CHEBYSHEV)) usedDist = new ChebyshevDistance(instances); //else if (distEnum.equals(MDSDistancesEnum.DT)) usedDist=new DTDistance(instances); else throw new IllegalStateException(); final int numInstances = instances.numInstances(); final boolean collapsed = (numInstances > maxInstances) && (distEnum.equals(MDSDistancesEnum.EUCLIDEAN) || distEnum.equals(MDSDistancesEnum.MANHATTAN)); SimpleMatrix distances; //Ignore class in distance if (ignoreClassInDistance && instances.classIndex() != -1) { usedDist.setAttributeIndices("" + (instances.classIndex() + 1)); usedDist.setInvertSelection(true); } int numCollapsedInstances = numInstances; if (collapsed) { //Compute distance with centroids using K-means with K=MAX_INSTANCES mapCentroids = getSimplifiedInstances(instances, usedDist, maxInstances); final List<Instance> centroids = mapCentroids.getCentroids(); numCollapsedInstances = centroids.size(); distances = new SimpleMatrix(numCollapsedInstances, numCollapsedInstances); for (int i = 0; i < numCollapsedInstances; i++) { for (int j = i + 1; j < numCollapsedInstances; j++) { double dist = usedDist.distance(centroids.get(i), centroids.get(j)); distances.set(i, j, dist); distances.set(j, i, dist); } } } else { distances = new SimpleMatrix(numCollapsedInstances, numCollapsedInstances); for (int i = 0; i < numCollapsedInstances; i++) { for (int j = i + 1; j < numCollapsedInstances; j++) { double dist = usedDist.distance(instances.get(i), instances.get(j)); distances.set(i, j, dist); distances.set(j, i, dist); } } } return new CollapsedInstances(instances, mapCentroids, distances, collapsed); }
From source file:nl.uva.sne.classifiers.Hierarchical.java
@Override public Map<String, String> cluster(String inDir) throws IOException, ParseException { try {//from www . ja v a 2 s . c o m Instances data = ClusterUtils.terms2Instances(inDir, false); // ArffSaver s = new ArffSaver(); // s.setInstances(data); // s.setFile(new File(inDir+"/dataset.arff")); // s.writeBatch(); DistanceFunction df; // SimpleKMeans currently only supports the Euclidean and Manhattan distances. switch (distanceFunction) { case "Minkowski": df = new MinkowskiDistance(data); break; case "Euclidean": df = new EuclideanDistance(data); break; case "Chebyshev": df = new ChebyshevDistance(data); break; case "Manhattan": df = new ManhattanDistance(data); break; default: df = new EuclideanDistance(data); break; } Logger.getLogger(Hierarchical.class.getName()).log(Level.INFO, "Start clusteing"); weka.clusterers.HierarchicalClusterer clusterer = new HierarchicalClusterer(); clusterer.setOptions(new String[] { "-L", "COMPLETE" }); clusterer.setDebug(true); clusterer.setNumClusters(numOfClusters); clusterer.setDistanceFunction(df); clusterer.setDistanceIsBranchLength(true); clusterer.setPrintNewick(false); weka.clusterers.FilteredClusterer fc = new weka.clusterers.FilteredClusterer(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "1"; // we want to ignore the attribute that is in the position '1' Remove remove = new Remove(); // new instance of filter remove.setOptions(options); // set options fc.setFilter(remove); //add filter to remove attributes fc.setClusterer(clusterer); //bind FilteredClusterer to original clusterer fc.buildClusterer(data); // // Print normal // clusterer.setPrintNewick(false); // System.out.println(clusterer.graph()); // // Print Newick // clusterer.setPrintNewick(true); // System.out.println(clusterer.graph()); // // // Let's try to show this clustered data! // JFrame mainFrame = new JFrame("Weka Test"); // mainFrame.setSize(600, 400); // mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); // Container content = mainFrame.getContentPane(); // content.setLayout(new GridLayout(1, 1)); // // HierarchyVisualizer visualizer = new HierarchyVisualizer(clusterer.graph()); // content.add(visualizer); // // mainFrame.setVisible(true); return ClusterUtils.bulidClusters(clusterer, data, inDir); } catch (Exception ex) { Logger.getLogger(Hierarchical.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:nl.uva.sne.classifiers.Kmeans.java
@Override public Map<String, String> cluster(String inDir) throws IOException, ParseException { try {/*from ww w . ja v a2 s.c om*/ Instances data = ClusterUtils.terms2Instances(inDir, false); DistanceFunction df; // SimpleKMeans currently only supports the Euclidean and Manhattan distances. switch (distanceFunction) { case "Euclidean": df = new EuclideanDistance(data); break; case "Manhattan": df = new ManhattanDistance(data); break; default: df = new EuclideanDistance(data); break; } SimpleKMeans clusterer = new SimpleKMeans(); Random rand = new Random(System.currentTimeMillis()); int seed = rand.nextInt((Integer.MAX_VALUE - 1000000) + 1) + 1000000; clusterer.setSeed(seed); clusterer.setMaxIterations(1000000000); Logger.getLogger(Kmeans.class.getName()).log(Level.INFO, "Start clusteing"); clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(numOfClusters); clusterer.setDistanceFunction(df); return ClusterUtils.bulidClusters(clusterer, data, inDir); } catch (Exception ex) { Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex); } return null; }