Example usage for weka.core EuclideanDistance EuclideanDistance

List of usage examples for weka.core EuclideanDistance EuclideanDistance

Introduction

In this page you can find the example usage for weka.core EuclideanDistance EuclideanDistance.

Prototype

public EuclideanDistance() 

Source Link

Document

Constructs an Euclidean Distance object, Instances must be still set.

Usage

From source file:ClusteringClass.java

public static void main(String[] args) throws Exception {
    String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv";

    try {// ww w .j av a 2 s  .c  o m
        FileWriter fw = new FileWriter(filename);
        Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance();
        Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani",
                "dani");

        String query = "SELECT * FROM SONG_RATING2";
        Statement stmt = conn.createStatement();
        ResultSet rs = stmt.executeQuery(query);

        for (int i = 1; i < 23; i++) {
            if (i != 2) {
                ResultSetMetaData rsmd = rs.getMetaData();
                String name = rsmd.getColumnName(i);
                fw.append(name);
                if (i != 22) {
                    fw.append(',');
                } else {
                    fw.append('\n');
                }
            }
        }

        String query1 = "SELECT * FROM SONG_DATA";
        Statement stmt1 = conn.createStatement();
        ResultSet rs1 = stmt1.executeQuery(query1);

        String[] titles = new String[150];

        for (int ii = 0; ii < 150; ii++) {
            rs1.next();
            titles[ii] = rs1.getString("TITLE");
        }

        while (rs.next()) {
            for (int i = 1; i < 23; i++) {
                if (i == 22)
                    fw.append('\n');
                else if (i != 2) {
                    fw.append(',');
                }
            }
        }

        fw.flush();
        fw.close();
        conn.close();
        System.out.println("CSV File is created successfully.");

        /*
         Clustering part
         */
        DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv");
        Instances train = source.getDataSet();

        /*
         Applichiamo il filtro Remove fornito da Weka per non considerare un
         attributo nell'algoritmo di Clustering.
         */
        Remove filter = new Remove();
        filter.setAttributeIndices("1");
        filter.setInputFormat(train);
        Instances train2 = Filter.useFilter(train, filter);
        System.out.println("Nominal attributes removed from computation.");

        /*
         Applichiamo il filtro Normalize fornito da Weka per normalizzare il 
         nostro dataset.
         */
        Normalize norm = new Normalize();
        norm.setInputFormat(train2);
        Instances train3 = Filter.useFilter(train2, norm);
        System.out.println("Dataset normalized.");

        /*
         First Clustering Algorithm
         */
        EuclideanDistance df = new EuclideanDistance();
        SimpleKMeans clus1 = new SimpleKMeans();
        int k = 10;
        clus1.setNumClusters(k);
        clus1.setDistanceFunction(df);
        clus1.setPreserveInstancesOrder(true);
        clus1.buildClusterer(train3);

        /*
         First Evaluation
         */
        ClusterEvaluation eval1 = new ClusterEvaluation();
        eval1.setClusterer(clus1);
        eval1.evaluateClusterer(train3);
        System.out.println(eval1.clusterResultsToString());

        int[] assignments = clus1.getAssignments();
        String[][] dati = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati[kk][0] = String.valueOf(kk);
            dati[kk][1] = train2.instance(kk).toString();
            dati[kk][2] = String.valueOf(assignments[kk]);
            dati[kk][3] = titles[kk];
        }

        for (int w = 0; w < 10; w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (dati[i][2].equals(String.valueOf(w))) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati[i][j] + "-> \t");
                        } else {
                            System.out.println(dati[i][j]);
                        }
                    }
                }
            }
        }

        /*first graph  
                
         PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1);
         //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date());
         String name = "";
         String cname = clus1.getClass().getName();
         if (cname.startsWith("weka.clusterers."))
         name += cname.substring("weka.clusterers.".length());
         else
         name += cname;
                
                
         VisualizePanel vp = new VisualizePanel();
         vp.setName(name + " (" + train.relationName() + ")");
         predData.setPlotName(name + " (" + train.relationName() + ")");
         vp.addPlot(predData);
                
         String plotName = vp.getName();
         final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName);
         jf.setSize(500,400);
         jf.getContentPane().setLayout(new BorderLayout());
         jf.getContentPane().add(vp, BorderLayout.CENTER);
         jf.dispose();
         jf.addWindowListener(new java.awt.event.WindowAdapter() {
         public void windowClosing(java.awt.event.WindowEvent e) {
         jf.dispose();
         }
         });
         jf.setVisible(true);
                
         end first graph
         */

        /*
         Second Clustering Algorithm
         */

        System.out.println();

        DBSCAN clus3 = new DBSCAN();
        clus3.setEpsilon(0.7);
        clus3.setMinPoints(2);
        clus3.buildClusterer(train3);

        /*
         Second Evaluation
         */
        ClusterEvaluation eval3 = new ClusterEvaluation();
        eval3.setClusterer(clus3);
        eval3.evaluateClusterer(train3);
        System.out.println(eval3.clusterResultsToString());

        double[] assignments3 = eval3.getClusterAssignments();
        String[][] dati3 = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati3[kk][0] = String.valueOf(kk);
            dati3[kk][1] = train2.instance(kk).toString();
            dati3[kk][2] = String.valueOf(assignments3[kk]);
            dati3[kk][3] = titles[kk];
        }

        for (int w = 0; w < eval3.getNumClusters(); w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (Double.parseDouble(dati3[i][2]) == w) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati3[i][j] + "-> \t");
                        } else {
                            System.out.println(dati3[i][j]);
                        }
                    }
                }
            }
        }
        System.out.println();
        for (int i = 0; i < 150; i++) {
            if (Double.parseDouble(dati3[i][2]) == -1.0) {
                for (int j = 0; j < 4; j++) {
                    if (j != 3) {
                        System.out.print(dati3[i][j] + "-> \t");
                    } else {
                        System.out.println(dati3[i][j]);
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:MultiLabelKNN.java

License:Open Source License

/**
 * The default constructor
 */
public MultiLabelKNN() {
    this.numOfNeighbors = 10;
    dfunc = new EuclideanDistance();
}

From source file:MultiLabelKNN.java

License:Open Source License

/**
 * Initializes the number of neighbors// w w  w  .  ja  v a 2  s . c  o m
 *
 * @param numOfNeighbors the number of neighbors
 */
public MultiLabelKNN(int numOfNeighbors) {
    this.numOfNeighbors = numOfNeighbors;
    dfunc = new EuclideanDistance();
}

From source file:HierarchicalClusterer.java

License:Open Source License

/**
 * Parses a given list of options. <p/>
 *
    <!-- options-start -->//from   w  w  w  . j a  v  a2 s.c  o  m
 * Valid options are: <p/>
 * 
    <!-- options-end -->
 *
 * @param options the list of options as an array of strings
 * @throws Exception if an option is not supported
 */
public void setOptions(String[] options) throws Exception {
    m_bPrintNewick = Utils.getFlag('P', options);

    String optionString = Utils.getOption('N', options);
    if (optionString.length() != 0) {
        Integer temp = new Integer(optionString);
        setNumClusters(temp);
    } else {
        setNumClusters(2);
    }

    setDebug(Utils.getFlag('D', options));
    setDistanceIsBranchLength(Utils.getFlag('B', options));

    String sLinkType = Utils.getOption('L', options);

    if (sLinkType.compareTo("SINGLE") == 0) {
        setLinkType(new SelectedTag(SINGLE, TAGS_LINK_TYPE));
    }
    if (sLinkType.compareTo("COMPLETE") == 0) {
        setLinkType(new SelectedTag(COMPLETE, TAGS_LINK_TYPE));
    }
    if (sLinkType.compareTo("AVERAGE") == 0) {
        setLinkType(new SelectedTag(AVERAGE, TAGS_LINK_TYPE));
    }
    if (sLinkType.compareTo("MEAN") == 0) {
        setLinkType(new SelectedTag(MEAN, TAGS_LINK_TYPE));
    }
    if (sLinkType.compareTo("CENTROID") == 0) {
        setLinkType(new SelectedTag(CENTROID, TAGS_LINK_TYPE));
    }
    if (sLinkType.compareTo("WARD") == 0) {
        setLinkType(new SelectedTag(WARD, TAGS_LINK_TYPE));
    }
    if (sLinkType.compareTo("ADJCOMLPETE") == 0) {
        setLinkType(new SelectedTag(ADJCOMLPETE, TAGS_LINK_TYPE));
    }
    if (sLinkType.compareTo("NEIGHBOR_JOINING") == 0) {
        setLinkType(new SelectedTag(NEIGHBOR_JOINING, TAGS_LINK_TYPE));
    }

    String nnSearchClass = Utils.getOption('A', options);
    if (nnSearchClass.length() != 0) {
        String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass);
        if (nnSearchClassSpec.length == 0) {
            throw new Exception("Invalid DistanceFunction specification string.");
        }
        String className = nnSearchClassSpec[0];
        nnSearchClassSpec[0] = "";

        setDistanceFunction(
                (DistanceFunction) Utils.forName(DistanceFunction.class, className, nnSearchClassSpec));
    } else {
        setDistanceFunction(new EuclideanDistance());
    }

    Utils.checkForRemainingOptions(options);
}

From source file:br.ufrn.ia.core.clustering.SimpleKMeansIaProject.java

License:Open Source License

public void setOptions(String[] options) throws Exception {

    m_displayStdDevs = Utils.getFlag("V", options);
    m_dontReplaceMissing = Utils.getFlag("M", options);

    String optionString = Utils.getOption('N', options);

    if (optionString.length() != 0) {
        setNumClusters(Integer.parseInt(optionString));
    }/*from  w  w  w.  j a v  a 2s  .  c  o  m*/

    optionString = Utils.getOption("I", options);
    if (optionString.length() != 0) {
        setMaxIterations(Integer.parseInt(optionString));
    }

    String distFunctionClass = Utils.getOption('A', options);
    if (distFunctionClass.length() != 0) {
        String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass);
        if (distFunctionClassSpec.length == 0) {
            throw new Exception("Invalid DistanceFunction specification string.");
        }
        String className = distFunctionClassSpec[0];
        distFunctionClassSpec[0] = "";

        setDistanceFunction(
                (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec));
    } else {
        setDistanceFunction(new EuclideanDistance());
    }

    m_PreserveOrder = Utils.getFlag("O", options);

    super.setOptions(options);
}

From source file:clusterer.SimpleKMeansWithSilhouette.java

License:Open Source License

/**
 * Parses a given list of options.//ww  w .j  a  v a2  s. co  m
 * <p/>
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -N &lt;num&gt;
 *  Number of clusters.
 *  (default 2).
 * </pre>
 * 
 * <pre>
 * -init
 *  Initialization method to use.
 *  0 = random, 1 = k-means++, 2 = canopy, 3 = farthest first.
 *  (default = 0)
 * </pre>
 * 
 * <pre>
 * -C
 *  Use canopies to reduce the number of distance calculations.
 * </pre>
 * 
 * <pre>
 * -max-candidates &lt;num&gt;
 *  Maximum number of candidate canopies to retain in memory
 *  at any one time when using canopy clustering.
 *  T2 distance plus, data characteristics,
 *  will determine how many candidate canopies are formed before
 *  periodic and final pruning are performed, which might result
 *  in exceess memory consumption. This setting avoids large numbers
 *  of candidate canopies consuming memory. (default = 100)
 * </pre>
 * 
 * <pre>
 * -periodic-pruning &lt;num&gt;
 *  How often to prune low density canopies when using canopy clustering. 
 *  (default = every 10,000 training instances)
 * </pre>
 * 
 * <pre>
 * -min-density
 *  Minimum canopy density, when using canopy clustering, below which
 *   a canopy will be pruned during periodic pruning. (default = 2 instances)
 * </pre>
 * 
 * <pre>
 * -t2
 *  The T2 distance to use when using canopy clustering. Values &lt; 0 indicate that
 *  a heuristic based on attribute std. deviation should be used to set this.
 *  (default = -1.0)
 * </pre>
 * 
 * <pre>
 * -t1
 *  The T1 distance to use when using canopy clustering. A value &lt; 0 is taken as a
 *  positive multiplier for T2. (default = -1.5)
 * </pre>
 * 
 * <pre>
 * -V
 *  Display std. deviations for centroids.
 * </pre>
 * 
 * <pre>
 * -M
 *  Don't replace missing values with mean/mode.
 * </pre>
 * 
 * <pre>
 * -A &lt;classname and options&gt;
 *  Distance function to use.
 *  (default: weka.core.EuclideanDistance)
 * </pre>
 * 
 * <pre>
 * -I &lt;num&gt;
 *  Maximum number of iterations.
 * </pre>
 * 
 * <pre>
 * -O
 *  Preserve order of instances.
 * </pre>
 * 
 * <pre>
 * -fast
 *  Enables faster distance calculations, using cut-off values.
 *  Disables the calculation/output of squared errors/distances.
 * </pre>
 * 
 * <pre>
 * -num-slots &lt;num&gt;
 *  Number of execution slots.
 *  (default 1 - i.e. no parallelism)
 * </pre>
 * 
 * <pre>
 * -S &lt;num&gt;
 *  Random number seed.
 *  (default 10)
 * </pre>
 * 
 * <pre>
 * -output-debug-info
 *  If set, clusterer is run in debug mode and
 *  may output additional info to the console
 * </pre>
 * 
 * <pre>
 * -do-not-check-capabilities
 *  If set, clusterer capabilities are not checked before clusterer is built
 *  (use with caution).
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @param options the list of options as an array of strings
 * @throws Exception if an option is not supported
 */
@Override
public void setOptions(String[] options) throws Exception {

    m_displayStdDevs = Utils.getFlag("V", options);
    m_dontReplaceMissing = Utils.getFlag("M", options);

    String initM = Utils.getOption("init", options);
    if (initM.length() > 0) {
        setInitializationMethod(new SelectedTag(Integer.parseInt(initM), TAGS_SELECTION));
    }

    m_speedUpDistanceCompWithCanopies = Utils.getFlag('C', options);

    String temp = Utils.getOption("max-candidates", options);
    if (temp.length() > 0) {
        setCanopyMaxNumCanopiesToHoldInMemory(Integer.parseInt(temp));
    }

    temp = Utils.getOption("periodic-pruning", options);
    if (temp.length() > 0) {
        setCanopyPeriodicPruningRate(Integer.parseInt(temp));
    }

    temp = Utils.getOption("min-density", options);
    if (temp.length() > 0) {
        setCanopyMinimumCanopyDensity(Double.parseDouble(temp));
    }

    temp = Utils.getOption("t2", options);
    if (temp.length() > 0) {
        setCanopyT2(Double.parseDouble(temp));
    }

    temp = Utils.getOption("t1", options);
    if (temp.length() > 0) {
        setCanopyT1(Double.parseDouble(temp));
    }

    String optionString = Utils.getOption('N', options);

    if (optionString.length() != 0) {
        setNumClusters(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption("I", options);
    if (optionString.length() != 0) {
        setMaxIterations(Integer.parseInt(optionString));
    }

    String distFunctionClass = Utils.getOption('A', options);
    if (distFunctionClass.length() != 0) {
        String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass);
        if (distFunctionClassSpec.length == 0) {
            throw new Exception("Invalid DistanceFunction specification string.");
        }
        String className = distFunctionClassSpec[0];
        distFunctionClassSpec[0] = "";

        setDistanceFunction(
                (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec));
    } else {
        setDistanceFunction(new EuclideanDistance());
    }

    m_PreserveOrder = Utils.getFlag("O", options);

    m_FastDistanceCalc = Utils.getFlag("fast", options);

    String slotsS = Utils.getOption("num-slots", options);
    if (slotsS.length() > 0) {
        setNumExecutionSlots(Integer.parseInt(slotsS));
    }

    super.setOptions(options);

    Utils.checkForRemainingOptions(options);
}

From source file:cn.edu.xmu.dm.d3c.clustering.SimpleKMeans.java

License:Open Source License

/**
 * Parses a given list of options. <p/>
 * //from   w ww. j  a  va 2s .c  om
 <!-- options-start -->
 * Valid options are: <p/>
 * 
 * <pre> -N &lt;num&gt;
 *  number of clusters.
 *  (default 2).</pre>
 * 
 * <pre> -P
 *  Initialize using the k-means++ method.
 * </pre>
 * 
 * <pre> -V
 *  Display std. deviations for centroids.
 * </pre>
 * 
 * <pre> -M
 *  Replace missing values with mean/mode.
 * </pre>
 * 
 * <pre> -A &lt;classname and options&gt;
 *  Distance function to use.
 *  (default: weka.core.EuclideanDistance)</pre>
 * 
 * <pre> -I &lt;num&gt;
 *  Maximum number of iterations.
 * </pre>
 * 
 * <pre> -O
 *  Preserve order of instances.
 * </pre>
 * 
 * <pre> -fast
 *  Enables faster distance calculations, using cut-off values.
 *  Disables the calculation/output of squared errors/distances.
 * </pre>
 * 
 * <pre> -S &lt;num&gt;
 *  Random number seed.
 *  (default 10)</pre>
 * 
 <!-- options-end -->
 *
 * @param options the list of options as an array of strings
 * @throws Exception if an option is not supported
 */
public void setOptions(String[] options) throws Exception {

    m_displayStdDevs = Utils.getFlag("V", options);
    m_dontReplaceMissing = Utils.getFlag("M", options);
    m_initializeWithKMeansPlusPlus = Utils.getFlag('P', options);

    String optionString = Utils.getOption('N', options);

    if (optionString.length() != 0) {
        setNumClusters(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption("I", options);
    if (optionString.length() != 0) {
        setMaxIterations(Integer.parseInt(optionString));
    }

    String distFunctionClass = Utils.getOption('A', options);
    if (distFunctionClass.length() != 0) {
        String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass);
        if (distFunctionClassSpec.length == 0) {
            throw new Exception("Invalid DistanceFunction specification string.");
        }
        String className = distFunctionClassSpec[0];
        distFunctionClassSpec[0] = "";

        setDistanceFunction(
                (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec));
    } else {
        setDistanceFunction(new EuclideanDistance());
    }

    m_PreserveOrder = Utils.getFlag("O", options);

    m_FastDistanceCalc = Utils.getFlag("fast", options);

    super.setOptions(options);
}

From source file:cn.pku.sei.GHRC.SpectralClusterer.java

License:Open Source License

/**
 * Returns the distance function/*from www.ja v  a2s  . co  m*/
 * 
 * @return
 */
public DistanceFunction getDistanceFunction() {
    if (distanceFunction == null) {
        distanceFunction = new EuclideanDistance();
    }
    if (distanceFunction.getInstances() != getData())
        distanceFunction.setInstances(getData());
    return distanceFunction;
}

From source file:de.unimannheim.dws.algorithms.CustomSimpleKMedian.java

License:Open Source License

/**
 * Parses a given list of options./*from w  w w.  ja v  a  2s .  c  om*/
 * <p/>
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -N &lt;num&gt;
 *  number of clusters.
 *  (default 2).
 * </pre>
 * 
 * <pre>
 * -V
 *  Display std. deviations for centroids.
 * </pre>
 * 
 * <pre>
 * -M
 *  Replace missing values with mean/mode.
 * </pre>
 * 
 * <pre>
 * -S &lt;num&gt;
 *  Random number seed.
 *  (default 10)
 * </pre>
 * 
 * <pre>
 * -A &lt;classname and options&gt;
 *  Distance function to be used for instance comparison
 *  (default weka.core.EuclidianDistance)
 * </pre>
 * 
 * <pre>
 * -I &lt;num&gt;
 *  Maximum number of iterations.
 * </pre>
 * 
 * <pre>
 * -O
 *  Preserve order of instances.
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @param options the list of options as an array of strings
 * @throws Exception if an option is not supported
 */
@Override
public void setOptions(String[] options) throws Exception {

    m_displayStdDevs = Utils.getFlag("V", options);
    m_dontReplaceMissing = Utils.getFlag("M", options);

    String optionString = Utils.getOption('N', options);

    if (optionString.length() != 0) {
        setNumClusters(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption("I", options);
    if (optionString.length() != 0) {
        setMaxIterations(Integer.parseInt(optionString));
    }

    String distFunctionClass = Utils.getOption('A', options);
    if (distFunctionClass.length() != 0) {
        String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass);
        if (distFunctionClassSpec.length == 0) {
            throw new Exception("Invalid DistanceFunction specification string.");
        }
        String className = distFunctionClassSpec[0];
        distFunctionClassSpec[0] = "";

        setDistanceFunction(
                (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec));
    } else {
        setDistanceFunction(new EuclideanDistance());
    }

    m_PreserveOrder = Utils.getFlag("O", options);

    super.setOptions(options);
}

From source file:de.uni_potsdam.hpi.bpt.promnicat.processEvolution.clustering.ProcessEvolutionClusterer.java

License:Open Source License

/**
 * cluster the models according to the given configuration
 * @param models//from w  ww . j  a v  a2 s  .c o m
 * @param configuration
 * @throws IOException
 */
public static void doClustering(Map<String, ProcessEvolutionModel> models,
        ProcessEvolutionClusteringConfiguration configuration) throws IOException {
    clusterer.setNumericDistanceFunction(new EuclideanDistance());
    // add an initial header to the result string
    StringBuilder clusterResultStringBuilder = new StringBuilder().append("Configuration:").append("[");

    setClusterAttributes(configuration, clusterResultStringBuilder);
    // cluster the models
    ProcessInstances instances = getInstances(models, getNumericAttributes(configuration));
    clusterModels(instances, clusterResultStringBuilder);
    // write the results into a file
    writeResults(clusterResultStringBuilder);
}