List of usage examples for weka.core EuclideanDistance EuclideanDistance
public EuclideanDistance()
From source file:ClusteringClass.java
public static void main(String[] args) throws Exception { String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"; try {// ww w .j av a 2 s .c o m FileWriter fw = new FileWriter(filename); Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance(); Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani", "dani"); String query = "SELECT * FROM SONG_RATING2"; Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(query); for (int i = 1; i < 23; i++) { if (i != 2) { ResultSetMetaData rsmd = rs.getMetaData(); String name = rsmd.getColumnName(i); fw.append(name); if (i != 22) { fw.append(','); } else { fw.append('\n'); } } } String query1 = "SELECT * FROM SONG_DATA"; Statement stmt1 = conn.createStatement(); ResultSet rs1 = stmt1.executeQuery(query1); String[] titles = new String[150]; for (int ii = 0; ii < 150; ii++) { rs1.next(); titles[ii] = rs1.getString("TITLE"); } while (rs.next()) { for (int i = 1; i < 23; i++) { if (i == 22) fw.append('\n'); else if (i != 2) { fw.append(','); } } } fw.flush(); fw.close(); conn.close(); System.out.println("CSV File is created successfully."); /* Clustering part */ DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"); Instances train = source.getDataSet(); /* Applichiamo il filtro Remove fornito da Weka per non considerare un attributo nell'algoritmo di Clustering. */ Remove filter = new Remove(); filter.setAttributeIndices("1"); filter.setInputFormat(train); Instances train2 = Filter.useFilter(train, filter); System.out.println("Nominal attributes removed from computation."); /* Applichiamo il filtro Normalize fornito da Weka per normalizzare il nostro dataset. */ Normalize norm = new Normalize(); norm.setInputFormat(train2); Instances train3 = Filter.useFilter(train2, norm); System.out.println("Dataset normalized."); /* First Clustering Algorithm */ EuclideanDistance df = new EuclideanDistance(); SimpleKMeans clus1 = new SimpleKMeans(); int k = 10; clus1.setNumClusters(k); clus1.setDistanceFunction(df); clus1.setPreserveInstancesOrder(true); clus1.buildClusterer(train3); /* First Evaluation */ ClusterEvaluation eval1 = new ClusterEvaluation(); eval1.setClusterer(clus1); eval1.evaluateClusterer(train3); System.out.println(eval1.clusterResultsToString()); int[] assignments = clus1.getAssignments(); String[][] dati = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati[kk][0] = String.valueOf(kk); dati[kk][1] = train2.instance(kk).toString(); dati[kk][2] = String.valueOf(assignments[kk]); dati[kk][3] = titles[kk]; } for (int w = 0; w < 10; w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (dati[i][2].equals(String.valueOf(w))) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati[i][j] + "-> \t"); } else { System.out.println(dati[i][j]); } } } } } /*first graph PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1); //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date()); String name = ""; String cname = clus1.getClass().getName(); if (cname.startsWith("weka.clusterers.")) name += cname.substring("weka.clusterers.".length()); else name += cname; VisualizePanel vp = new VisualizePanel(); vp.setName(name + " (" + train.relationName() + ")"); predData.setPlotName(name + " (" + train.relationName() + ")"); vp.addPlot(predData); String plotName = vp.getName(); final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName); jf.setSize(500,400); jf.getContentPane().setLayout(new BorderLayout()); jf.getContentPane().add(vp, BorderLayout.CENTER); jf.dispose(); jf.addWindowListener(new java.awt.event.WindowAdapter() { public void windowClosing(java.awt.event.WindowEvent e) { jf.dispose(); } }); jf.setVisible(true); end first graph */ /* Second Clustering Algorithm */ System.out.println(); DBSCAN clus3 = new DBSCAN(); clus3.setEpsilon(0.7); clus3.setMinPoints(2); clus3.buildClusterer(train3); /* Second Evaluation */ ClusterEvaluation eval3 = new ClusterEvaluation(); eval3.setClusterer(clus3); eval3.evaluateClusterer(train3); System.out.println(eval3.clusterResultsToString()); double[] assignments3 = eval3.getClusterAssignments(); String[][] dati3 = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati3[kk][0] = String.valueOf(kk); dati3[kk][1] = train2.instance(kk).toString(); dati3[kk][2] = String.valueOf(assignments3[kk]); dati3[kk][3] = titles[kk]; } for (int w = 0; w < eval3.getNumClusters(); w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == w) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == -1.0) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } catch (Exception e) { e.printStackTrace(); } }
From source file:MultiLabelKNN.java
License:Open Source License
/** * The default constructor */ public MultiLabelKNN() { this.numOfNeighbors = 10; dfunc = new EuclideanDistance(); }
From source file:MultiLabelKNN.java
License:Open Source License
/** * Initializes the number of neighbors// w w w . ja v a 2 s . c o m * * @param numOfNeighbors the number of neighbors */ public MultiLabelKNN(int numOfNeighbors) { this.numOfNeighbors = numOfNeighbors; dfunc = new EuclideanDistance(); }
From source file:HierarchicalClusterer.java
License:Open Source License
/** * Parses a given list of options. <p/> * <!-- options-start -->//from w w w . j a v a2 s.c o m * Valid options are: <p/> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { m_bPrintNewick = Utils.getFlag('P', options); String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { Integer temp = new Integer(optionString); setNumClusters(temp); } else { setNumClusters(2); } setDebug(Utils.getFlag('D', options)); setDistanceIsBranchLength(Utils.getFlag('B', options)); String sLinkType = Utils.getOption('L', options); if (sLinkType.compareTo("SINGLE") == 0) { setLinkType(new SelectedTag(SINGLE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("COMPLETE") == 0) { setLinkType(new SelectedTag(COMPLETE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("AVERAGE") == 0) { setLinkType(new SelectedTag(AVERAGE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("MEAN") == 0) { setLinkType(new SelectedTag(MEAN, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("CENTROID") == 0) { setLinkType(new SelectedTag(CENTROID, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("WARD") == 0) { setLinkType(new SelectedTag(WARD, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("ADJCOMLPETE") == 0) { setLinkType(new SelectedTag(ADJCOMLPETE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("NEIGHBOR_JOINING") == 0) { setLinkType(new SelectedTag(NEIGHBOR_JOINING, TAGS_LINK_TYPE)); } String nnSearchClass = Utils.getOption('A', options); if (nnSearchClass.length() != 0) { String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass); if (nnSearchClassSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = nnSearchClassSpec[0]; nnSearchClassSpec[0] = ""; setDistanceFunction( (DistanceFunction) Utils.forName(DistanceFunction.class, className, nnSearchClassSpec)); } else { setDistanceFunction(new EuclideanDistance()); } Utils.checkForRemainingOptions(options); }
From source file:br.ufrn.ia.core.clustering.SimpleKMeansIaProject.java
License:Open Source License
public void setOptions(String[] options) throws Exception { m_displayStdDevs = Utils.getFlag("V", options); m_dontReplaceMissing = Utils.getFlag("M", options); String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumClusters(Integer.parseInt(optionString)); }/*from w w w. j a v a 2s . c o m*/ optionString = Utils.getOption("I", options); if (optionString.length() != 0) { setMaxIterations(Integer.parseInt(optionString)); } String distFunctionClass = Utils.getOption('A', options); if (distFunctionClass.length() != 0) { String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass); if (distFunctionClassSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = distFunctionClassSpec[0]; distFunctionClassSpec[0] = ""; setDistanceFunction( (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec)); } else { setDistanceFunction(new EuclideanDistance()); } m_PreserveOrder = Utils.getFlag("O", options); super.setOptions(options); }
From source file:clusterer.SimpleKMeansWithSilhouette.java
License:Open Source License
/** * Parses a given list of options.//ww w .j a v a2 s. co m * <p/> * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -N <num> * Number of clusters. * (default 2). * </pre> * * <pre> * -init * Initialization method to use. * 0 = random, 1 = k-means++, 2 = canopy, 3 = farthest first. * (default = 0) * </pre> * * <pre> * -C * Use canopies to reduce the number of distance calculations. * </pre> * * <pre> * -max-candidates <num> * Maximum number of candidate canopies to retain in memory * at any one time when using canopy clustering. * T2 distance plus, data characteristics, * will determine how many candidate canopies are formed before * periodic and final pruning are performed, which might result * in exceess memory consumption. This setting avoids large numbers * of candidate canopies consuming memory. (default = 100) * </pre> * * <pre> * -periodic-pruning <num> * How often to prune low density canopies when using canopy clustering. * (default = every 10,000 training instances) * </pre> * * <pre> * -min-density * Minimum canopy density, when using canopy clustering, below which * a canopy will be pruned during periodic pruning. (default = 2 instances) * </pre> * * <pre> * -t2 * The T2 distance to use when using canopy clustering. Values < 0 indicate that * a heuristic based on attribute std. deviation should be used to set this. * (default = -1.0) * </pre> * * <pre> * -t1 * The T1 distance to use when using canopy clustering. A value < 0 is taken as a * positive multiplier for T2. (default = -1.5) * </pre> * * <pre> * -V * Display std. deviations for centroids. * </pre> * * <pre> * -M * Don't replace missing values with mean/mode. * </pre> * * <pre> * -A <classname and options> * Distance function to use. * (default: weka.core.EuclideanDistance) * </pre> * * <pre> * -I <num> * Maximum number of iterations. * </pre> * * <pre> * -O * Preserve order of instances. * </pre> * * <pre> * -fast * Enables faster distance calculations, using cut-off values. * Disables the calculation/output of squared errors/distances. * </pre> * * <pre> * -num-slots <num> * Number of execution slots. * (default 1 - i.e. no parallelism) * </pre> * * <pre> * -S <num> * Random number seed. * (default 10) * </pre> * * <pre> * -output-debug-info * If set, clusterer is run in debug mode and * may output additional info to the console * </pre> * * <pre> * -do-not-check-capabilities * If set, clusterer capabilities are not checked before clusterer is built * (use with caution). * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { m_displayStdDevs = Utils.getFlag("V", options); m_dontReplaceMissing = Utils.getFlag("M", options); String initM = Utils.getOption("init", options); if (initM.length() > 0) { setInitializationMethod(new SelectedTag(Integer.parseInt(initM), TAGS_SELECTION)); } m_speedUpDistanceCompWithCanopies = Utils.getFlag('C', options); String temp = Utils.getOption("max-candidates", options); if (temp.length() > 0) { setCanopyMaxNumCanopiesToHoldInMemory(Integer.parseInt(temp)); } temp = Utils.getOption("periodic-pruning", options); if (temp.length() > 0) { setCanopyPeriodicPruningRate(Integer.parseInt(temp)); } temp = Utils.getOption("min-density", options); if (temp.length() > 0) { setCanopyMinimumCanopyDensity(Double.parseDouble(temp)); } temp = Utils.getOption("t2", options); if (temp.length() > 0) { setCanopyT2(Double.parseDouble(temp)); } temp = Utils.getOption("t1", options); if (temp.length() > 0) { setCanopyT1(Double.parseDouble(temp)); } String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumClusters(Integer.parseInt(optionString)); } optionString = Utils.getOption("I", options); if (optionString.length() != 0) { setMaxIterations(Integer.parseInt(optionString)); } String distFunctionClass = Utils.getOption('A', options); if (distFunctionClass.length() != 0) { String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass); if (distFunctionClassSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = distFunctionClassSpec[0]; distFunctionClassSpec[0] = ""; setDistanceFunction( (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec)); } else { setDistanceFunction(new EuclideanDistance()); } m_PreserveOrder = Utils.getFlag("O", options); m_FastDistanceCalc = Utils.getFlag("fast", options); String slotsS = Utils.getOption("num-slots", options); if (slotsS.length() > 0) { setNumExecutionSlots(Integer.parseInt(slotsS)); } super.setOptions(options); Utils.checkForRemainingOptions(options); }
From source file:cn.edu.xmu.dm.d3c.clustering.SimpleKMeans.java
License:Open Source License
/** * Parses a given list of options. <p/> * //from w ww. j a va 2s .c om <!-- options-start --> * Valid options are: <p/> * * <pre> -N <num> * number of clusters. * (default 2).</pre> * * <pre> -P * Initialize using the k-means++ method. * </pre> * * <pre> -V * Display std. deviations for centroids. * </pre> * * <pre> -M * Replace missing values with mean/mode. * </pre> * * <pre> -A <classname and options> * Distance function to use. * (default: weka.core.EuclideanDistance)</pre> * * <pre> -I <num> * Maximum number of iterations. * </pre> * * <pre> -O * Preserve order of instances. * </pre> * * <pre> -fast * Enables faster distance calculations, using cut-off values. * Disables the calculation/output of squared errors/distances. * </pre> * * <pre> -S <num> * Random number seed. * (default 10)</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { m_displayStdDevs = Utils.getFlag("V", options); m_dontReplaceMissing = Utils.getFlag("M", options); m_initializeWithKMeansPlusPlus = Utils.getFlag('P', options); String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumClusters(Integer.parseInt(optionString)); } optionString = Utils.getOption("I", options); if (optionString.length() != 0) { setMaxIterations(Integer.parseInt(optionString)); } String distFunctionClass = Utils.getOption('A', options); if (distFunctionClass.length() != 0) { String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass); if (distFunctionClassSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = distFunctionClassSpec[0]; distFunctionClassSpec[0] = ""; setDistanceFunction( (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec)); } else { setDistanceFunction(new EuclideanDistance()); } m_PreserveOrder = Utils.getFlag("O", options); m_FastDistanceCalc = Utils.getFlag("fast", options); super.setOptions(options); }
From source file:cn.pku.sei.GHRC.SpectralClusterer.java
License:Open Source License
/** * Returns the distance function/*from www.ja v a2s . co m*/ * * @return */ public DistanceFunction getDistanceFunction() { if (distanceFunction == null) { distanceFunction = new EuclideanDistance(); } if (distanceFunction.getInstances() != getData()) distanceFunction.setInstances(getData()); return distanceFunction; }
From source file:de.unimannheim.dws.algorithms.CustomSimpleKMedian.java
License:Open Source License
/** * Parses a given list of options./*from w w w. ja v a 2s . c om*/ * <p/> * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -N <num> * number of clusters. * (default 2). * </pre> * * <pre> * -V * Display std. deviations for centroids. * </pre> * * <pre> * -M * Replace missing values with mean/mode. * </pre> * * <pre> * -S <num> * Random number seed. * (default 10) * </pre> * * <pre> * -A <classname and options> * Distance function to be used for instance comparison * (default weka.core.EuclidianDistance) * </pre> * * <pre> * -I <num> * Maximum number of iterations. * </pre> * * <pre> * -O * Preserve order of instances. * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { m_displayStdDevs = Utils.getFlag("V", options); m_dontReplaceMissing = Utils.getFlag("M", options); String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumClusters(Integer.parseInt(optionString)); } optionString = Utils.getOption("I", options); if (optionString.length() != 0) { setMaxIterations(Integer.parseInt(optionString)); } String distFunctionClass = Utils.getOption('A', options); if (distFunctionClass.length() != 0) { String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass); if (distFunctionClassSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = distFunctionClassSpec[0]; distFunctionClassSpec[0] = ""; setDistanceFunction( (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec)); } else { setDistanceFunction(new EuclideanDistance()); } m_PreserveOrder = Utils.getFlag("O", options); super.setOptions(options); }
From source file:de.uni_potsdam.hpi.bpt.promnicat.processEvolution.clustering.ProcessEvolutionClusterer.java
License:Open Source License
/** * cluster the models according to the given configuration * @param models//from w ww . j a v a2 s .c o m * @param configuration * @throws IOException */ public static void doClustering(Map<String, ProcessEvolutionModel> models, ProcessEvolutionClusteringConfiguration configuration) throws IOException { clusterer.setNumericDistanceFunction(new EuclideanDistance()); // add an initial header to the result string StringBuilder clusterResultStringBuilder = new StringBuilder().append("Configuration:").append("["); setClusterAttributes(configuration, clusterResultStringBuilder); // cluster the models ProcessInstances instances = getInstances(models, getNumericAttributes(configuration)); clusterModels(instances, clusterResultStringBuilder); // write the results into a file writeResults(clusterResultStringBuilder); }