List of usage examples for weka.core MinkowskiDistance MinkowskiDistance
public MinkowskiDistance(Instances data)
From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java
License:Open Source License
/** * //from w w w . j a v a 2s . c o m */ public static CollapsedInstances distanceBetweenInstances(final Instances instances, final MDSDistancesEnum distEnum, final int maxInstances, final boolean ignoreClassInDistance) throws Exception { KmeansResult mapCentroids = null; final NormalizableDistance usedDist; if (distEnum.equals(MDSDistancesEnum.EUCLIDEAN)) { usedDist = new EuclideanDistance(instances); //usedDist.setDontNormalize(true); //usedDist.setAttributeIndices("1"); //usedDist.setInvertSelection(true); } else if (distEnum.equals(MDSDistancesEnum.MANHATTAN)) usedDist = new ManhattanDistance(instances); else if (distEnum.equals(MDSDistancesEnum.MINKOWSKI)) { usedDist = new MinkowskiDistance(instances); final String[] parameters = MDSDistancesEnum.MINKOWSKI.getParameters(); //Change order double order = Double.valueOf(parameters[0]).doubleValue(); ((MinkowskiDistance) usedDist).setOrder(order); } else if (distEnum.equals(MDSDistancesEnum.CHEBYSHEV)) usedDist = new ChebyshevDistance(instances); //else if (distEnum.equals(MDSDistancesEnum.DT)) usedDist=new DTDistance(instances); else throw new IllegalStateException(); final int numInstances = instances.numInstances(); final boolean collapsed = (numInstances > maxInstances) && (distEnum.equals(MDSDistancesEnum.EUCLIDEAN) || distEnum.equals(MDSDistancesEnum.MANHATTAN)); SimpleMatrix distances; //Ignore class in distance if (ignoreClassInDistance && instances.classIndex() != -1) { usedDist.setAttributeIndices("" + (instances.classIndex() + 1)); usedDist.setInvertSelection(true); } int numCollapsedInstances = numInstances; if (collapsed) { //Compute distance with centroids using K-means with K=MAX_INSTANCES mapCentroids = getSimplifiedInstances(instances, usedDist, maxInstances); final List<Instance> centroids = mapCentroids.getCentroids(); numCollapsedInstances = centroids.size(); distances = new SimpleMatrix(numCollapsedInstances, numCollapsedInstances); for (int i = 0; i < numCollapsedInstances; i++) { for (int j = i + 1; j < numCollapsedInstances; j++) { double dist = usedDist.distance(centroids.get(i), centroids.get(j)); distances.set(i, j, dist); distances.set(j, i, dist); } } } else { distances = new SimpleMatrix(numCollapsedInstances, numCollapsedInstances); for (int i = 0; i < numCollapsedInstances; i++) { for (int j = i + 1; j < numCollapsedInstances; j++) { double dist = usedDist.distance(instances.get(i), instances.get(j)); distances.set(i, j, dist); distances.set(j, i, dist); } } } return new CollapsedInstances(instances, mapCentroids, distances, collapsed); }
From source file:nl.uva.sne.classifiers.Hierarchical.java
@Override public Map<String, String> cluster(String inDir) throws IOException, ParseException { try {/* ww w .j ava2 s . c o m*/ Instances data = ClusterUtils.terms2Instances(inDir, false); // ArffSaver s = new ArffSaver(); // s.setInstances(data); // s.setFile(new File(inDir+"/dataset.arff")); // s.writeBatch(); DistanceFunction df; // SimpleKMeans currently only supports the Euclidean and Manhattan distances. switch (distanceFunction) { case "Minkowski": df = new MinkowskiDistance(data); break; case "Euclidean": df = new EuclideanDistance(data); break; case "Chebyshev": df = new ChebyshevDistance(data); break; case "Manhattan": df = new ManhattanDistance(data); break; default: df = new EuclideanDistance(data); break; } Logger.getLogger(Hierarchical.class.getName()).log(Level.INFO, "Start clusteing"); weka.clusterers.HierarchicalClusterer clusterer = new HierarchicalClusterer(); clusterer.setOptions(new String[] { "-L", "COMPLETE" }); clusterer.setDebug(true); clusterer.setNumClusters(numOfClusters); clusterer.setDistanceFunction(df); clusterer.setDistanceIsBranchLength(true); clusterer.setPrintNewick(false); weka.clusterers.FilteredClusterer fc = new weka.clusterers.FilteredClusterer(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "1"; // we want to ignore the attribute that is in the position '1' Remove remove = new Remove(); // new instance of filter remove.setOptions(options); // set options fc.setFilter(remove); //add filter to remove attributes fc.setClusterer(clusterer); //bind FilteredClusterer to original clusterer fc.buildClusterer(data); // // Print normal // clusterer.setPrintNewick(false); // System.out.println(clusterer.graph()); // // Print Newick // clusterer.setPrintNewick(true); // System.out.println(clusterer.graph()); // // // Let's try to show this clustered data! // JFrame mainFrame = new JFrame("Weka Test"); // mainFrame.setSize(600, 400); // mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); // Container content = mainFrame.getContentPane(); // content.setLayout(new GridLayout(1, 1)); // // HierarchyVisualizer visualizer = new HierarchyVisualizer(clusterer.graph()); // content.add(visualizer); // // mainFrame.setVisible(true); return ClusterUtils.bulidClusters(clusterer, data, inDir); } catch (Exception ex) { Logger.getLogger(Hierarchical.class.getName()).log(Level.SEVERE, null, ex); } return null; }