Example usage for weka.core EuclideanDistance EuclideanDistance

List of usage examples for weka.core EuclideanDistance EuclideanDistance

Introduction

In this page you can find the example usage for weka.core EuclideanDistance EuclideanDistance.

Prototype

public EuclideanDistance(Instances data) 

Source Link

Document

Constructs an Euclidean Distance object and automatically initializes the ranges.

Usage

From source file:agnes.MyAgnes.java

public void buildClusterer(Instances data) {
    distanceCounter = new EuclideanDistance(data);
    ArrayList<ArrayList<Instance>> currentClusters = new ArrayList<>();
    for (int i = 0; i < data.numInstances(); i++) {
        currentClusters.add(new ArrayList<>());
        currentClusters.get(i).add(data.instance(i));
        instanceID.put(data.instance(i), i);
    }//from   w w  w  . ja v  a2  s . c om
    addNewClusterHierarchy(currentClusters);
}

From source file:kmeans.MyKMeans.java

void clusteringInstance() {
    ArrayList<Integer> tempList = new ArrayList<Integer>();
    Instances tempInst = new Instances(dataSource, 0);
    EuclideanDistance ed = new EuclideanDistance(centroid);
    int[] pointList = new int[numCluster];
    int checkNumberChange = 0;
    for (int i = 0; i < numCluster; i++) {
        pointList[i] = i;/* ww w . j  a  va 2 s.  co m*/
    }
    for (int i = 0; i < dataSource.numInstances(); i++) {
        int clusterNumber = -1;
        Instance currentInst = dataSource.get(i);
        try {
            clusterNumber = ed.closestPoint(currentInst, centroid, pointList);
        } catch (Exception ex) {
            System.out.println("************** " + ex.toString());
        }
        int clusterBefore = clusteredInstance.put(i, clusterNumber);
        if (clusterNumber != clusterBefore)
            checkNumberChange++;
    }
    if (checkNumberChange != 0)
        finish = false;
    else
        finish = true;
    updateListClusteredInstance();
    //  printListClusteredInstance();
}

From source file:kmeans_extend.MyKMeans.java

void clusteringInstance() {
    ArrayList<Integer> tempList = new ArrayList<Integer>();
    Instances tempInst = new Instances(dataSource, 0);
    EuclideanDistance ed = new EuclideanDistance(centroid);
    int[] pointList = new int[noOfClusters];
    int checkNumberChange = 0;
    for (int i = 0; i < noOfClusters; i++) {
        pointList[i] = i;/* w w  w. j  av a 2 s  . c om*/
    }
    for (int i = 0; i < dataSource.numInstances(); i++) {
        int clusterNumber = -1;
        Instance currentInst = dataSource.get(i);
        try {
            clusterNumber = ed.closestPoint(currentInst, centroid, pointList);
        } catch (Exception ex) {
            System.out.println("************** " + ex.toString());
        }
        int clusterBefore = clusteredInstance.put(i, clusterNumber);
        if (clusterNumber != clusterBefore)
            checkNumberChange++;
    }
    if (checkNumberChange != 0)
        finish = false;
    else
        finish = true;
    updateListClusteredInstance();
    //  printListClusteredInstance();
}

From source file:kmeans_extend.MyKMeans.java

public int clusterInstance(Instance instance) {
    int clusterNo = -1;
    EuclideanDistance ed = new EuclideanDistance(centroid);
    int[] pointList = new int[noOfClusters];
    for (int i = 0; i < noOfClusters; i++) {
        pointList[i] = i;/*from  w ww. j  a  v a  2  s.  c om*/
    }
    try {
        clusterNo = ed.closestPoint(instance, centroid, pointList);
    } catch (Exception ex) {
        System.out.println("************** " + ex.toString());
    }
    return clusterNo;
}

From source file:lu.lippmann.cdb.datasetview.tabs.UnsupervisedFeatureEvaluationTabView.java

License:Open Source License

private static Instances buildDerivatedDatasetForFeaturesClusters(final Instances dataSet, final int k)
        throws Exception {
    final Instances trdataSet = WekaDataProcessingUtil.buildTransposedDataSet(dataSet);

    final EuclideanDistance distanceFunction = new EuclideanDistance(trdataSet);

    final SimpleKMeans skm = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(k, distanceFunction);
    skm.buildClusterer(trdataSet);/* w  w w. ja  va  2 s . c om*/
    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(skm);
    eval.evaluateClusterer(trdataSet);

    final int numClusters = eval.getNumClusters();
    final List<String> possibleValues = new ArrayList<String>(numClusters);
    for (int c = 0; c < numClusters; c++)
        possibleValues.add("cluster_" + c);

    final double[] clusterAssignments = eval.getClusterAssignments();

    final int numAttributes = dataSet.numAttributes();
    final List<Integer> valueForEachFeature = new ArrayList<Integer>(numAttributes);
    for (int j = 0; j < numAttributes; j++) {
        //System.out.println(clusterAssignments[j]+" "+(int)clusterAssignments[j]);
        valueForEachFeature.add((int) clusterAssignments[j]);
    }

    return buildDerivatedDataset(dataSet, possibleValues, valueForEachFeature);
}

From source file:lu.lippmann.cdb.datasetview.tasks.AddCentroidDistanceTask.java

License:Open Source License

/**
 * {@inheritDoc}/*from w ww.  j  a v  a 2  s .  co m*/
 */
@Override
Instances process0(final Instances dataSet) throws Exception {
    return WekaMachineLearningUtil.buildDataSetWithCentroidDistanceAsNewFeature(dataSet,
            new EuclideanDistance(dataSet));
}

From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java

License:Open Source License

/**
 * /*from   www . j  a  v a2 s  .c  o m*/
 * @param instances
 */
public KmeansImproved(final Instances pinstances, final int maxClusterSize) {
    try {
        this.instances = new Instances(pinstances);
        this.distance = new EuclideanDistance(this.instances);
        this.maxClusters = maxClusterSize;
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java

License:Open Source License

/**
 * /*from  w ww . j a  v  a 2  s.  c  om*/
 */
public static CollapsedInstances distanceBetweenInstances(final Instances instances,
        final MDSDistancesEnum distEnum, final int maxInstances, final boolean ignoreClassInDistance)
        throws Exception {
    KmeansResult mapCentroids = null;

    final NormalizableDistance usedDist;
    if (distEnum.equals(MDSDistancesEnum.EUCLIDEAN)) {
        usedDist = new EuclideanDistance(instances);
        //usedDist.setDontNormalize(true);
        //usedDist.setAttributeIndices("1");
        //usedDist.setInvertSelection(true);
    } else if (distEnum.equals(MDSDistancesEnum.MANHATTAN))
        usedDist = new ManhattanDistance(instances);
    else if (distEnum.equals(MDSDistancesEnum.MINKOWSKI)) {
        usedDist = new MinkowskiDistance(instances);
        final String[] parameters = MDSDistancesEnum.MINKOWSKI.getParameters();
        //Change order
        double order = Double.valueOf(parameters[0]).doubleValue();
        ((MinkowskiDistance) usedDist).setOrder(order);
    } else if (distEnum.equals(MDSDistancesEnum.CHEBYSHEV))
        usedDist = new ChebyshevDistance(instances);
    //else if (distEnum.equals(MDSDistancesEnum.DT)) usedDist=new DTDistance(instances);
    else
        throw new IllegalStateException();

    final int numInstances = instances.numInstances();
    final boolean collapsed = (numInstances > maxInstances)
            && (distEnum.equals(MDSDistancesEnum.EUCLIDEAN) || distEnum.equals(MDSDistancesEnum.MANHATTAN));

    SimpleMatrix distances;

    //Ignore class in distance
    if (ignoreClassInDistance && instances.classIndex() != -1) {
        usedDist.setAttributeIndices("" + (instances.classIndex() + 1));
        usedDist.setInvertSelection(true);
    }

    int numCollapsedInstances = numInstances;
    if (collapsed) {
        //Compute distance with centroids using K-means with K=MAX_INSTANCES
        mapCentroids = getSimplifiedInstances(instances, usedDist, maxInstances);

        final List<Instance> centroids = mapCentroids.getCentroids();
        numCollapsedInstances = centroids.size();

        distances = new SimpleMatrix(numCollapsedInstances, numCollapsedInstances);

        for (int i = 0; i < numCollapsedInstances; i++) {
            for (int j = i + 1; j < numCollapsedInstances; j++) {
                double dist = usedDist.distance(centroids.get(i), centroids.get(j));
                distances.set(i, j, dist);
                distances.set(j, i, dist);
            }
        }
    } else {
        distances = new SimpleMatrix(numCollapsedInstances, numCollapsedInstances);
        for (int i = 0; i < numCollapsedInstances; i++) {
            for (int j = i + 1; j < numCollapsedInstances; j++) {
                double dist = usedDist.distance(instances.get(i), instances.get(j));
                distances.set(i, j, dist);
                distances.set(j, i, dist);
            }
        }
    }
    return new CollapsedInstances(instances, mapCentroids, distances, collapsed);
}

From source file:nl.uva.sne.classifiers.Hierarchical.java

@Override
public Map<String, String> cluster(String inDir) throws IOException, ParseException {
    try {//from www . ja v  a  2  s . c  o m

        Instances data = ClusterUtils.terms2Instances(inDir, false);

        //            ArffSaver s = new ArffSaver();
        //            s.setInstances(data);
        //            s.setFile(new File(inDir+"/dataset.arff"));
        //            s.writeBatch();

        DistanceFunction df;
        //            SimpleKMeans currently only supports the Euclidean and Manhattan distances.
        switch (distanceFunction) {
        case "Minkowski":
            df = new MinkowskiDistance(data);
            break;
        case "Euclidean":
            df = new EuclideanDistance(data);
            break;
        case "Chebyshev":
            df = new ChebyshevDistance(data);
            break;
        case "Manhattan":
            df = new ManhattanDistance(data);
            break;
        default:
            df = new EuclideanDistance(data);
            break;
        }

        Logger.getLogger(Hierarchical.class.getName()).log(Level.INFO, "Start clusteing");

        weka.clusterers.HierarchicalClusterer clusterer = new HierarchicalClusterer();
        clusterer.setOptions(new String[] { "-L", "COMPLETE" });
        clusterer.setDebug(true);
        clusterer.setNumClusters(numOfClusters);
        clusterer.setDistanceFunction(df);
        clusterer.setDistanceIsBranchLength(true);
        clusterer.setPrintNewick(false);

        weka.clusterers.FilteredClusterer fc = new weka.clusterers.FilteredClusterer();
        String[] options = new String[2];
        options[0] = "-R"; // "range"
        options[1] = "1"; // we want to ignore the attribute that is in the position '1'
        Remove remove = new Remove(); // new instance of filter
        remove.setOptions(options); // set options

        fc.setFilter(remove); //add filter to remove attributes
        fc.setClusterer(clusterer); //bind FilteredClusterer to original clusterer
        fc.buildClusterer(data);

        //             // Print normal
        //        clusterer.setPrintNewick(false);
        //        System.out.println(clusterer.graph());
        //        // Print Newick
        //        clusterer.setPrintNewick(true);
        //        System.out.println(clusterer.graph());
        //
        //        // Let's try to show this clustered data!
        //        JFrame mainFrame = new JFrame("Weka Test");
        //        mainFrame.setSize(600, 400);
        //        mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
        //        Container content = mainFrame.getContentPane();
        //        content.setLayout(new GridLayout(1, 1));
        //
        //        HierarchyVisualizer visualizer = new HierarchyVisualizer(clusterer.graph());
        //        content.add(visualizer);
        //
        //        mainFrame.setVisible(true);
        return ClusterUtils.bulidClusters(clusterer, data, inDir);

    } catch (Exception ex) {
        Logger.getLogger(Hierarchical.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:nl.uva.sne.classifiers.Kmeans.java

@Override
public Map<String, String> cluster(String inDir) throws IOException, ParseException {
    try {/*from   ww  w  . ja  v  a2 s.c om*/

        Instances data = ClusterUtils.terms2Instances(inDir, false);

        DistanceFunction df;
        //            SimpleKMeans currently only supports the Euclidean and Manhattan distances.
        switch (distanceFunction) {
        case "Euclidean":
            df = new EuclideanDistance(data);
            break;
        case "Manhattan":
            df = new ManhattanDistance(data);
            break;
        default:
            df = new EuclideanDistance(data);
            break;
        }

        SimpleKMeans clusterer = new SimpleKMeans();

        Random rand = new Random(System.currentTimeMillis());
        int seed = rand.nextInt((Integer.MAX_VALUE - 1000000) + 1) + 1000000;
        clusterer.setSeed(seed);
        clusterer.setMaxIterations(1000000000);
        Logger.getLogger(Kmeans.class.getName()).log(Level.INFO, "Start clusteing");
        clusterer.setPreserveInstancesOrder(true);

        clusterer.setNumClusters(numOfClusters);
        clusterer.setDistanceFunction(df);

        return ClusterUtils.bulidClusters(clusterer, data, inDir);

    } catch (Exception ex) {
        Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}