Example usage for weka.core EuclideanDistance distance

List of usage examples for weka.core EuclideanDistance distance

Introduction

In this page you can find the example usage for weka.core EuclideanDistance distance.

Prototype

public double distance(Instance first, Instance second) 

Source Link

Document

Calculates the distance between two instances.

Usage

From source file:experimentshell.KnnClassifier.java

/**
 *
 * @param pInst/*from  ww w  .  j a  v a2  s .  co m*/
 * @return
 * @throws Exception
 */
@Override
public double classifyInstance(Instance pInst) throws Exception {
    mK = 3;
    boolean same = true;
    EuclideanDistance eDist = new EuclideanDistance();
    eDist.setInstances(mInstances);
    int numInst = mInstances.numInstances();
    Instance[] KNN;
    KNN = new Instance[mK]; //array of three
    double[] distKNN;
    distKNN = new double[mK];

    //System.out.println(eDist.getInstances());
    for (int i = 0; i < numInst; i++) {
        Instance tempInst = mInstances.instance(i);

        double dist = eDist.distance(tempInst, pInst);

        //loop through 3 array so soon as all instancs loops
        // finish the 3 smallest will I have.

        for (int j = 0; j < mK; j++) {
            if (distKNN[j] == 0.0) {
                distKNN[j] = dist;
                KNN[j] = tempInst;
                break;
            } else if (distKNN[j] < dist) {
                distKNN[j] = dist;
                KNN[j] = tempInst;
            }
        }
    }

    for (int temp = 0; temp < mK - 1; temp++) {

        if (KNN[temp] != KNN[temp + 1])
            same = false;
    }
    if (same) {
        if (KNN[0].attribute(4).name() == "Iris-setosa")
            mClass = 0;
        else if (KNN[0].attribute(4).name() == "Iris-versicolor")
            mClass = 1;
        else
            mClass = 2;
    }

    mClass = KNN[0].attribute(4).type();
    System.out.println(mClass);
    return mClass;
}

From source file:org.montp2.m1decol.ter.clustering.NearestNeighbor.java

License:Open Source License

public Map<Integer, List<DistanceUser>> computeNearestNeighbor(String arffData, String inModel,
        Map<Integer, Integer> arffToIdUser) throws Exception {

    SimpleKMeans kmeans = WekaUtils.loadModel(inModel);

    EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction();

    Instances data = new Instances(WekaUtils.loadARFF(arffData));

    int[] clusters = kmeans.getAssignments();
    Instances clusterCentroid = kmeans.getClusterCentroids();

    Map<Integer, List<DistanceUser>> nearUser = new HashMap<Integer, List<DistanceUser>>();

    for (int i = 0; i < clusterCentroid.numInstances(); i++) {
        nearUser.put(i, new ArrayList<DistanceUser>());
    }/* ww  w.j  a  v a  2 s  .  c o  m*/

    for (int i = 0; i < data.numInstances(); i++) {
        int ind = clusters[i];
        double dist = eclidean.distance(clusterCentroid.instance(ind), data.instance(i));
        List<DistanceUser> nears = nearUser.get(ind);
        if (nears.size() < 10) {
            nears.add(new DistanceUser(i, dist));
        } else {
            DistanceUser max = Collections.max(nears);
            if (max.getDistance() > dist) {
                int maxIndex = nears.indexOf(max);
                nears.set(maxIndex, new DistanceUser(i, dist));
            }
        }
    }

    for (Map.Entry<Integer, List<DistanceUser>> item : nearUser.entrySet()) {
        for (DistanceUser user : item.getValue()) {
            user.setIdentifier(arffToIdUser.get(user.getIdentifier()));
        }
    }

    return nearUser;
}

From source file:org.montp2.m1decol.ter.servlets.AnalyserMessageServlet.java

License:Open Source License

protected void processRequest(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    response.setContentType("text/html;charset=UTF-8");
    String message = request.getParameter("message");
    HttpSession session = request.getSession();
    //create file message
    try {/*from   w w w .  j av a  2 s .  c  om*/
        OutputStreamUtils.writeSimple(message.toLowerCase(), ROOT_PATH + "message.txt");
        UniGramsPreProcessing uni = new UniGramsPreProcessing();
        uni.computeLemmatization(ROOT_PATH, STOP_WORD);

        // crer le fichier arff
        List<String> arffData = InputStreamUtils.readByLine(ARFF_BASE);
        List<String> mgsLem = new ArrayList<String>();

        for (String line : InputStreamUtils.readByLine(ROOT_PATH + "message_lemma.txt")) {
            mgsLem.addAll(Arrays.asList(line.split("\\s")));
        }

        StringBuilder arffMessage = new StringBuilder();
        arffMessage.append("@relation 'Message_Utilisateur'\n\n");
        StringBuilder vectorMessage = new StringBuilder();
        vectorMessage.append("{");
        boolean copy = true;
        for (int i = 2; i < arffData.size(); i++) {
            String line = arffData.get(i);
            if (line.equalsIgnoreCase("@data")) {
                arffMessage.append(line + "\n");
                break;
            }

            if (!line.equals("")) {
                String values[] = line.split("\\s");
                if (mgsLem.contains(values[1])) {
                    vectorMessage.append(i - 2 + " 1,");
                }
            }

            arffMessage.append(line + "\n");
        }

        String vector = vectorMessage.toString();
        arffMessage.append(vector.substring(0, vector.length() - 1) + "}\n");

        OutputStreamUtils.writeSimple(arffMessage.toString(), ROOT_PATH + "message_arff.arff");

        // chercher le cluster
        SimpleKMeans kmeans = WekaUtils.loadModel(IN_MODEL);

        EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction();

        Instances data = new Instances(WekaUtils.loadARFF(ROOT_PATH + "message_arff.arff"));

        Instances clusterCentroid = kmeans.getClusterCentroids();

        double dist = Double.MAX_VALUE;
        int cluster_current = -99;

        for (int i = 0; i < clusterCentroid.numInstances(); i++) {
            System.out.println("cluster:" + i);
            double newDist = eclidean.distance(clusterCentroid.instance(i), data.instance(0));
            if (newDist < dist) {
                cluster_current = i;
                dist = newDist;
            }
        }
        // chercher les 10 profil
        System.out.println("cluster_current:" + cluster_current);

        boolean findUsers = false;
        List<Integer> idUsers = new ArrayList<Integer>();
        for (String line : InputStreamUtils.readByLine(NEIGHBOR)) {
            if (!"".equals(line)) {
                String values[] = line.split(":");

                if (!findUsers && values[0].equalsIgnoreCase("Cluster")) {
                    if (cluster_current == Integer.parseInt(values[1])) {
                        findUsers = true;
                    }
                } else {

                    if (findUsers && values[0].equalsIgnoreCase("id_user")) {
                        idUsers.add(Integer.parseInt(values[1].trim()));
                    }

                    if (findUsers && values[0].equalsIgnoreCase("Cluster")) {
                        break;
                    }
                }

            }

        }

        AbstractBusiness business = new ForumBusinness();
        session.setAttribute("LIST_USERS", business.findUsersByIDs(idUsers));

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        FileUtils.removeFile(ROOT_PATH + "message.txt");
        FileUtils.removeFile(ROOT_PATH + "message_lemma.txt");
        FileUtils.removeFile(ROOT_PATH + "message_arff.arff");
    }
    response.sendRedirect("/CategorizeUserForum/results.jsp");
}

From source file:testtubesclassifier.MyAgnes.java

@Override
public void buildClusterer(Instances instances) throws Exception {
    //initalization
    finalCluster = instances;/*from w  ww . j  a v a  2s  .c  om*/

    getCapabilities().testWithFail(instances);

    //replace missing value
    finalCluster.deleteWithMissingClass();

    //adding the cluster value while not convergen using means of all cluster
    FastVector values = new FastVector();
    for (int j = 0; j < numberCluster; j++) {
        values.addElement(String.valueOf(j));
    }
    finalCluster.insertAttributeAt(new Attribute("Cluster", values), finalCluster.numAttributes());

    List<Item> edges = new ArrayList<>();
    EuclideanDistance euclidDistance = new EuclideanDistance();
    euclidDistance.setInstances(finalCluster);
    for (int i = 0; i < finalCluster.numInstances(); i++) {
        for (int j = i + 1; j < finalCluster.numInstances(); j++) {
            double d = euclidDistance.distance(finalCluster.instance(i), finalCluster.instance(j));
            edges.add(new Item(d, i, j));
        }
    }
    Collections.sort(edges);

    Node[] nodes = new Node[finalCluster.numInstances()];
    ori = new Node[finalCluster.numInstances()];
    for (int i = 0; i < finalCluster.numInstances(); i++) {
        nodes[i] = new Node(true, finalCluster.instance(i).toString());
        ori[i] = nodes[i];
    }

    dsu = new DisjoinSetUnion(finalCluster.numInstances());
    int clusterNow = finalCluster.numInstances();
    for (Item item : edges) {
        //System.out.println(item.dist + " " + item.index_i + " " + item.index_j);
        if (dsu.find(item.index_i) != dsu.find(item.index_j)) {
            Node a = nodes[dsu.find(item.index_i)];
            Node b = nodes[dsu.find(item.index_j)];
            dsu.merge(item.index_i, item.index_j);
            Node dad = new Node(false, "");
            dad.addChild(a);
            dad.addChild(b);
            nodes[dsu.find(item.index_i)] = dad;
            //System.out.println("merge " + item.index_i + " " + item.index_j + " " + dsu.getSize(item.index_i));
            clusterNow--;
        }
        if (clusterNow <= numberCluster)
            break;
    }

    boolean[] done = new boolean[finalCluster.numInstances()];
    this.root = new Node(false, "");
    for (int i = 0; i < finalCluster.numInstances(); i++) {
        int root = dsu.find(i);
        if (!done[root]) {
            done[root] = true;
            this.root.addChild(nodes[root]);
        }
    }
}

From source file:testtubesclassifier.MyKmeans.java

public double countDistance(Instance inst1, Instance inst2) throws Exception {
    double result = 0;
    EuclideanDistance DistFunction = new EuclideanDistance();
    DistFunction.setInstances(finalCluster);
    return DistFunction.distance(inst1, inst2);
}