List of usage examples for weka.core EuclideanDistance distance
public double distance(Instance first, Instance second)
From source file:experimentshell.KnnClassifier.java
/** * * @param pInst/*from ww w . j a v a2 s . co m*/ * @return * @throws Exception */ @Override public double classifyInstance(Instance pInst) throws Exception { mK = 3; boolean same = true; EuclideanDistance eDist = new EuclideanDistance(); eDist.setInstances(mInstances); int numInst = mInstances.numInstances(); Instance[] KNN; KNN = new Instance[mK]; //array of three double[] distKNN; distKNN = new double[mK]; //System.out.println(eDist.getInstances()); for (int i = 0; i < numInst; i++) { Instance tempInst = mInstances.instance(i); double dist = eDist.distance(tempInst, pInst); //loop through 3 array so soon as all instancs loops // finish the 3 smallest will I have. for (int j = 0; j < mK; j++) { if (distKNN[j] == 0.0) { distKNN[j] = dist; KNN[j] = tempInst; break; } else if (distKNN[j] < dist) { distKNN[j] = dist; KNN[j] = tempInst; } } } for (int temp = 0; temp < mK - 1; temp++) { if (KNN[temp] != KNN[temp + 1]) same = false; } if (same) { if (KNN[0].attribute(4).name() == "Iris-setosa") mClass = 0; else if (KNN[0].attribute(4).name() == "Iris-versicolor") mClass = 1; else mClass = 2; } mClass = KNN[0].attribute(4).type(); System.out.println(mClass); return mClass; }
From source file:org.montp2.m1decol.ter.clustering.NearestNeighbor.java
License:Open Source License
public Map<Integer, List<DistanceUser>> computeNearestNeighbor(String arffData, String inModel, Map<Integer, Integer> arffToIdUser) throws Exception { SimpleKMeans kmeans = WekaUtils.loadModel(inModel); EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction(); Instances data = new Instances(WekaUtils.loadARFF(arffData)); int[] clusters = kmeans.getAssignments(); Instances clusterCentroid = kmeans.getClusterCentroids(); Map<Integer, List<DistanceUser>> nearUser = new HashMap<Integer, List<DistanceUser>>(); for (int i = 0; i < clusterCentroid.numInstances(); i++) { nearUser.put(i, new ArrayList<DistanceUser>()); }/* ww w.j a v a 2 s . c o m*/ for (int i = 0; i < data.numInstances(); i++) { int ind = clusters[i]; double dist = eclidean.distance(clusterCentroid.instance(ind), data.instance(i)); List<DistanceUser> nears = nearUser.get(ind); if (nears.size() < 10) { nears.add(new DistanceUser(i, dist)); } else { DistanceUser max = Collections.max(nears); if (max.getDistance() > dist) { int maxIndex = nears.indexOf(max); nears.set(maxIndex, new DistanceUser(i, dist)); } } } for (Map.Entry<Integer, List<DistanceUser>> item : nearUser.entrySet()) { for (DistanceUser user : item.getValue()) { user.setIdentifier(arffToIdUser.get(user.getIdentifier())); } } return nearUser; }
From source file:org.montp2.m1decol.ter.servlets.AnalyserMessageServlet.java
License:Open Source License
protected void processRequest(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=UTF-8"); String message = request.getParameter("message"); HttpSession session = request.getSession(); //create file message try {/*from w w w . j av a 2 s . c om*/ OutputStreamUtils.writeSimple(message.toLowerCase(), ROOT_PATH + "message.txt"); UniGramsPreProcessing uni = new UniGramsPreProcessing(); uni.computeLemmatization(ROOT_PATH, STOP_WORD); // crer le fichier arff List<String> arffData = InputStreamUtils.readByLine(ARFF_BASE); List<String> mgsLem = new ArrayList<String>(); for (String line : InputStreamUtils.readByLine(ROOT_PATH + "message_lemma.txt")) { mgsLem.addAll(Arrays.asList(line.split("\\s"))); } StringBuilder arffMessage = new StringBuilder(); arffMessage.append("@relation 'Message_Utilisateur'\n\n"); StringBuilder vectorMessage = new StringBuilder(); vectorMessage.append("{"); boolean copy = true; for (int i = 2; i < arffData.size(); i++) { String line = arffData.get(i); if (line.equalsIgnoreCase("@data")) { arffMessage.append(line + "\n"); break; } if (!line.equals("")) { String values[] = line.split("\\s"); if (mgsLem.contains(values[1])) { vectorMessage.append(i - 2 + " 1,"); } } arffMessage.append(line + "\n"); } String vector = vectorMessage.toString(); arffMessage.append(vector.substring(0, vector.length() - 1) + "}\n"); OutputStreamUtils.writeSimple(arffMessage.toString(), ROOT_PATH + "message_arff.arff"); // chercher le cluster SimpleKMeans kmeans = WekaUtils.loadModel(IN_MODEL); EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction(); Instances data = new Instances(WekaUtils.loadARFF(ROOT_PATH + "message_arff.arff")); Instances clusterCentroid = kmeans.getClusterCentroids(); double dist = Double.MAX_VALUE; int cluster_current = -99; for (int i = 0; i < clusterCentroid.numInstances(); i++) { System.out.println("cluster:" + i); double newDist = eclidean.distance(clusterCentroid.instance(i), data.instance(0)); if (newDist < dist) { cluster_current = i; dist = newDist; } } // chercher les 10 profil System.out.println("cluster_current:" + cluster_current); boolean findUsers = false; List<Integer> idUsers = new ArrayList<Integer>(); for (String line : InputStreamUtils.readByLine(NEIGHBOR)) { if (!"".equals(line)) { String values[] = line.split(":"); if (!findUsers && values[0].equalsIgnoreCase("Cluster")) { if (cluster_current == Integer.parseInt(values[1])) { findUsers = true; } } else { if (findUsers && values[0].equalsIgnoreCase("id_user")) { idUsers.add(Integer.parseInt(values[1].trim())); } if (findUsers && values[0].equalsIgnoreCase("Cluster")) { break; } } } } AbstractBusiness business = new ForumBusinness(); session.setAttribute("LIST_USERS", business.findUsersByIDs(idUsers)); } catch (Exception e) { e.printStackTrace(); } finally { FileUtils.removeFile(ROOT_PATH + "message.txt"); FileUtils.removeFile(ROOT_PATH + "message_lemma.txt"); FileUtils.removeFile(ROOT_PATH + "message_arff.arff"); } response.sendRedirect("/CategorizeUserForum/results.jsp"); }
From source file:testtubesclassifier.MyAgnes.java
@Override public void buildClusterer(Instances instances) throws Exception { //initalization finalCluster = instances;/*from w ww . j a v a 2s .c om*/ getCapabilities().testWithFail(instances); //replace missing value finalCluster.deleteWithMissingClass(); //adding the cluster value while not convergen using means of all cluster FastVector values = new FastVector(); for (int j = 0; j < numberCluster; j++) { values.addElement(String.valueOf(j)); } finalCluster.insertAttributeAt(new Attribute("Cluster", values), finalCluster.numAttributes()); List<Item> edges = new ArrayList<>(); EuclideanDistance euclidDistance = new EuclideanDistance(); euclidDistance.setInstances(finalCluster); for (int i = 0; i < finalCluster.numInstances(); i++) { for (int j = i + 1; j < finalCluster.numInstances(); j++) { double d = euclidDistance.distance(finalCluster.instance(i), finalCluster.instance(j)); edges.add(new Item(d, i, j)); } } Collections.sort(edges); Node[] nodes = new Node[finalCluster.numInstances()]; ori = new Node[finalCluster.numInstances()]; for (int i = 0; i < finalCluster.numInstances(); i++) { nodes[i] = new Node(true, finalCluster.instance(i).toString()); ori[i] = nodes[i]; } dsu = new DisjoinSetUnion(finalCluster.numInstances()); int clusterNow = finalCluster.numInstances(); for (Item item : edges) { //System.out.println(item.dist + " " + item.index_i + " " + item.index_j); if (dsu.find(item.index_i) != dsu.find(item.index_j)) { Node a = nodes[dsu.find(item.index_i)]; Node b = nodes[dsu.find(item.index_j)]; dsu.merge(item.index_i, item.index_j); Node dad = new Node(false, ""); dad.addChild(a); dad.addChild(b); nodes[dsu.find(item.index_i)] = dad; //System.out.println("merge " + item.index_i + " " + item.index_j + " " + dsu.getSize(item.index_i)); clusterNow--; } if (clusterNow <= numberCluster) break; } boolean[] done = new boolean[finalCluster.numInstances()]; this.root = new Node(false, ""); for (int i = 0; i < finalCluster.numInstances(); i++) { int root = dsu.find(i); if (!done[root]) { done[root] = true; this.root.addChild(nodes[root]); } } }
From source file:testtubesclassifier.MyKmeans.java
public double countDistance(Instance inst1, Instance inst2) throws Exception { double result = 0; EuclideanDistance DistFunction = new EuclideanDistance(); DistFunction.setInstances(finalCluster); return DistFunction.distance(inst1, inst2); }