Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package dataMining; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import weka.core.Instance; import weka.core.Instances; import weka.core.converters.ArffLoader; import weka.core.converters.ArffSaver; /** * * @author Mateusz lzak */ public class kNN { /** * Wspczynnik k klasyfikatora kNN */ private int k; private int numAtt; /** * cieka do pliku z danymi treningowymi */ private String attributName; /** * Dane treningowe */ private Instances treningData; /** * Dane testowe */ private Instances testData; /** * Konstruktor klasy. Ustawia warto pola danymi. Ustawia take warto * parametru k klasyfikatora. * * @param newK wspczynnik k klasyfikatora * @param testData Dane testowe * @param trainingData dane treningowe * @param attName nazwa atrybutu, dla ktrego prowadzona bdzie klasyfikacja */ public kNN(int newK, Instances trainingData, Instances testData, String attName) { k = newK; this.testData = testData; this.treningData = trainingData; this.attributName = attName; } /** * Metoda wczytujca dane do programu. */ @Deprecated private void openFiles() { try { ArffLoader load = new ArffLoader(); treningData = load.getDataSet(); testData = load.getDataSet(); } catch (IOException e) { e.printStackTrace(); } } public Instances getData() { return testData; } /** * Metoda, w ktrej wyszukiwane jest k najbliszych ssiadw ze zbioru * danych treningowych dla danych testowych, nastpnie proponowana jest * decyzja ktra wystpia na najwikszej liczbie spord k ssiadw. * * @return Zestaw zmienionych danych typu String. */ public String reviewData() { String st = ""; int type = 0; numAtt = 0; for (int i = 0; i < testData.numAttributes(); i++) { String s = testData.attribute(i).name(); if (s.equals(attributName)) { numAtt = i; type = testData.attribute(i).type(); break; } } for (int i = 0; i < testData.numInstances(); i++) { try { Instance ins = testData.instance(i); if (type == 0) { ins.setValue(numAtt, selectValue(findNeighbors(ins))); } else { ins.setValue(numAtt, selectValue(findNeighbors(ins))); } st = st + ins.toString() + "\n"; } catch (IndexOutOfBoundsException e) { } } return st; } /** * Metoda do wyszykiwania najczstrzej decyzji spord ssiadw z listy * przekazywanej jako parametr. * * @param list lista ssiadw * @return Decyzja. */ private double selectValue(ArrayList<Instance> list) { HashMap<Double, Integer> valuesMap = new HashMap<>(); for (Instance i : list) { if (valuesMap.containsKey(Double.parseDouble(i.toString(numAtt)))) { int values = valuesMap.get(Double.parseDouble(i.toString(numAtt))) + 1; valuesMap.replace(Double.parseDouble(i.toString(numAtt)), values); } else { valuesMap.put(Double.parseDouble(i.toString(numAtt)), 1); } } Collection c = valuesMap.values(); Iterator it = c.iterator(); int max = Integer.parseInt(it.next().toString()); while (it.hasNext()) { int isMax = Integer.parseInt(it.next().toString()); if (max < isMax) { max = isMax; } } double value = 0; for (Double d : valuesMap.keySet()) { if (valuesMap.get(d).equals(max)) { value = d; } } return value; } /** * Metoda do wyszukiwania k ssiadw. * * @param ins obiekt dla ktrego szukamy ssiedztwa * @return Lista ssiadw. */ private ArrayList<Instance> findNeighbors(Instance ins) { ArrayList<Integer> listOfDistances = new ArrayList<>(); for (int i = 0; i < treningData.numInstances(); i++) { int dist = manhattan(ins, treningData.instance(i)); listOfDistances.add(dist); } ArrayList<Integer> listOfKDistances = new ArrayList<>(); int countOfNeighbors = k; while (countOfNeighbors > 0) { int min = listOfDistances.get(0); for (Integer i : listOfDistances) { if (i < min) { min = i; } } listOfKDistances.add(min); boolean i = listOfDistances.remove((Object) min); countOfNeighbors--; } ArrayList<Instance> listOfNeighbors = new ArrayList<>(); for (Integer i : listOfKDistances) { for (int j = 0; j < treningData.numInstances(); j++) { Instance inst = treningData.instance(j); if (manhattan(ins, inst) == i && !listOfNeighbors.contains(inst)) { listOfNeighbors.add(inst); break; } } } return listOfNeighbors; } /** * Metoda do liczenia odlegoci metryk Manhattan. * * @param a pierwszy obiekt * @param b drugi obiekt * @return odlego */ private int manhattan(Instance a, Instance b) { int sum = 0; for (int i = 0; i < a.numAttributes(); i++) { if (i == numAtt) { continue; } else { int tmp = 0; try { tmp = Math.abs(Integer.parseInt(a.toString(i)) - Integer.parseInt(b.toString(i))); } catch (NumberFormatException ex) { tmp = 0; } sum = sum + tmp; } } return sum; } /** * Metoda do zapisywania danych. * * @param fileName cieka, pod ktr dane maj by zapisane */ private void saveData(String fileName) { try { ArffSaver save = new ArffSaver(); save.setFile(new File(fileName)); save.setInstances(testData); save.writeBatch(); } catch (IOException e) { e.printStackTrace(); } } }