List of usage examples for weka.core Instance classValue
public double classValue();
From source file:myid3andc45classifier.Model.MyC45.java
public void makeMyC45Tree(Instances data) throws Exception { if (data.numInstances() == 0) { attribute = null;/*from w w w .j a v a 2 s. co m*/ label = Instance.missingValue(); return; } //System.out.println("NEW"); double[] infoGainRatios = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); if (!att.isNumeric()) infoGainRatios[att.index()] = computeInfoGainRatio(data, att); else infoGainRatios[att.index()] = Double.NEGATIVE_INFINITY; //System.out.println(att.name() + " " + infoGainRatios[att.index()]); } // TODO: build the tree attribute = data.attribute(maxIndex(infoGainRatios)); //System.out.println(infoGainRatios[maxIndex(infoGainRatios)]); // Make leaf if information gain is zero. // Otherwise create successors. if (infoGainRatios[maxIndex(infoGainRatios)] <= epsilon || Double.isNaN(infoGainRatios[maxIndex(infoGainRatios)])) { attribute = null; double[] numClasses = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); numClasses[(int) inst.classValue()]++; } label = maxIndex(numClasses); classAttribute = data.classAttribute(); } else { classAttribute = data.classAttribute(); Instances[] splitData = splitInstancesByAttribute(data, attribute); Instances[] distrData = splitInstancesByAttribute(data, data.classAttribute()); distribution = new double[distrData.length]; for (int j = 0; j < distribution.length; j++) { distribution[j] = distrData[j].numInstances(); } successors = new MyC45[attribute.numValues()]; for (int j = 0; j < attribute.numValues(); j++) { successors[j] = new MyC45(); successors[j].buildClassifier(splitData[j]); } } // TODO: prune //pruneTree(data); }
From source file:myid3andc45classifier.Model.MyC45.java
public double[] listClassCountsValues(Instances data) throws Exception { double[] classCounts = new double[data.numClasses()]; //array untuk menyimpan value kelas sesuai jumlah kelas Enumeration instanceEnum = data.enumerateInstances(); //Masukkan data ke array while (instanceEnum.hasMoreElements()) { Instance inst = (Instance) instanceEnum.nextElement(); classCounts[(int) inst.classValue()]++; }/* w w w. j av a2 s. c o m*/ return classCounts; }
From source file:myid3andc45classifier.Model.MyC45.java
public boolean checkInstance(Instance instance) { double cv = instance.classValue(); return isDoubleEqual(cv, classifyInstance(instance)); }
From source file:myid3andc45classifier.Model.MyID3.java
public void makeMyID3Tree(Instances data) throws Exception { // Mengecek apakah tidak terdapat instance yang dalam node ini if (data.numInstances() == 0) { attribute = null;/*from w w w . j a v a 2 s. c o m*/ classValue = Instance.missingValue(); return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } attribute = data.attribute(maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (isDoubleEqual(infoGains[attribute.index()], 0)) { attribute = null; double[] numClasses = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); numClasses[(int) inst.classValue()]++; } label = maxIndex(numClasses); classAttribute = data.classAttribute(); } else { Instances[] splitData = splitInstancesByAttribute(data, attribute); successors = new MyID3[attribute.numValues()]; for (int j = 0; j < attribute.numValues(); j++) { successors[j] = new MyID3(); successors[j].buildClassifier(splitData[j]); } } }
From source file:myJ48.MyJ48.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance// w ww. j a va 2s. c om */ public void buildTree(Instances data) throws Exception { if (data.numInstances() > 0) { // Lets find the highest Information Gain! // First compute each information gain attribute double IG[] = new double[data.numAttributes()]; Enumeration enumAttribute = data.enumerateAttributes(); while (enumAttribute.hasMoreElements()) { Attribute attribute = (Attribute) enumAttribute.nextElement(); IG[attribute.index()] = informationGain(data, attribute); // System.out.println(attribute.toString() + ": " + IG[attribute.index()]); } // Assign it as the tree attribute! currentAttribute = data.attribute(maxIndex(IG)); //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]); // IG = 0 then current node = leaf! if (Utils.eq(IG[currentAttribute.index()], 0)) { // Set the class value as the highest frequency of the class currentAttribute = null; classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); classValue = Utils.maxIndex(classDistribution); classAttribute = data.classAttribute(); } else { // Create another node from the current tree Instances[] splitData = splitDataByAttribute(data, currentAttribute); nodes = new MyJ48[currentAttribute.numValues()]; for (int i = 0; i < currentAttribute.numValues(); i++) { nodes[i] = new MyJ48(this); nodes[i].buildTree(splitData[i]); } } } else { classAttribute = null; classValue = Utils.missingValue(); classDistribution = new double[data.numClasses()]; } }
From source file:myJ48.MyJ48.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance// www. jav a2 s.c o m */ public void pruneTree2(Instances data) throws Exception { if (currentAttribute == null) { Attribute tempAttr = predecessor.currentAttribute; predecessor.currentAttribute = null; // Set the class value as the highest frequency of the class classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); predecessor.classValue = Utils.maxIndex(classDistribution); predecessor.classAttribute = data.classAttribute(); Weka weka = new Weka(); weka.setTraining("weather.nominal.arff"); String[] options_cl = { "" }; weka.setClassifier("myJ48.MyJ48", options_cl); weka.runCV(true); double currentAccuracy = weka.getM_Evaluation().correct(); double maxFalseAccuracy = initAccuracy * 0.9; if (maxFalseAccuracy > currentAccuracy) { predecessor.currentAttribute = tempAttr; visited = true; } else { visited = false; } } else if (visited) { } else { for (int j = 0; j < currentAttribute.numValues(); j++) { if (nodes[j] == null) { //System.out.println("null nodes"); } else { //System.out.println("ga null"); } nodes[j].pruneTree(data); } } }
From source file:myJ48.MyJ48.java
public MyJ48 pruneTree(Instances data) throws Exception { if (currentAttribute == null) { return this; } else {//from w w w .jav a2s . co m } if (currentAttribute != null) { for (int i = 0; i < currentAttribute.numValues(); i++) { boolean succLeaf = true; if (nodes[i].currentAttribute != null) { for (int j = 0; j < nodes[i].currentAttribute.numValues(); j++) { succLeaf = (succLeaf && (nodes[i].nodes[j].currentAttribute == null)); } if (succLeaf) { Attribute tempAttr = nodes[i].currentAttribute; nodes[i].currentAttribute = null; // Set the class value as the highest frequency of the class classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); nodes[i].classValue = Utils.maxIndex(classDistribution); nodes[i].classAttribute = data.classAttribute(); /*Weka weka = new Weka(); weka.setTraining("weather.nominal.arff"); String[] options_cl = {""}; weka.setClassifier("myJ48.MyJ48", options_cl); weka.runCV(true); double currentAccuracy = weka.getM_Evaluation().correct();*/ Random rand = new Random(); double currentAccuracy = rand.nextDouble(); System.out.println("acc kepake : " + currentAccuracy); double maxFalseAccuracy = 0.7; // coba coba if (maxFalseAccuracy > currentAccuracy) { nodes[i].currentAttribute = tempAttr; //visited = true; } else { //visited = false; } } } else { nodes[i] = nodes[i].pruneTree(data); } } } return this; }
From source file:NaiveBayes.NaiveBayes13514004.java
@Override public void buildClassifier(Instances i) { //Algoritma//from ww w . j av a 2s . c o m origin = new Instances(i); //Menghitung jumlah attribute dan kelas numAtt = i.numAttributes() - 1; numClass = i.numClasses(); //Inisialisasi matrix 3 dimensi data = new int[numAtt][numClass][0]; prob = new double[numAtt][numClass][0]; kelasdata = new int[numClass]; kelasprob = new double[numClass]; Enumeration<Instance> enu1 = i.enumerateInstances(); while (enu1.hasMoreElements()) { Instance ins = enu1.nextElement(); Enumeration<Attribute> enu_t = i.enumerateAttributes(); int x = 0; while (enu_t.hasMoreElements()) { Attribute att = enu_t.nextElement(); numDis = att.numValues(); data[x][(int) ins.classValue()] = new int[numDis]; prob[x][(int) ins.classValue()] = new double[numDis]; x++; } } //Mengisi matriks Frekuensi Enumeration<Instance> enu2 = i.enumerateInstances(); while (enu2.hasMoreElements()) { Instance ins = enu2.nextElement(); Enumeration<Attribute> enu_t = i.enumerateAttributes(); int x = 0; while (enu_t.hasMoreElements()) { Attribute att = enu_t.nextElement(); data[x][(int) ins.classValue()][(int) ins.value(att)]++; x++; } kelasdata[(int) ins.classValue()]++; } //Menghitung probabilitas kelas double numInstances = (double) i.numInstances(); for (int y = 0; y < numClass; y++) { kelasprob[y] = (double) kelasdata[y] / numInstances; } //Mengisi matriks probabilitas Enumeration<Instance> enu3 = i.enumerateInstances(); while (enu3.hasMoreElements()) { Instance ins = enu3.nextElement(); Enumeration<Attribute> enu_t = i.enumerateAttributes(); int x = 0; while (enu_t.hasMoreElements()) { Attribute att = enu_t.nextElement(); int sumDis = Utils.sum(data[x][(int) ins.classValue()]); numDis = att.numValues(); for (int z = 0; z < numDis; z++) { int y = (int) ins.classValue(); prob[x][y][z] = ((double) data[x][y][z] / (double) sumDis); } x++; } } }
From source file:naivebayes.NBTubesAI.java
@Override public void buildClassifier(Instances data) throws Exception { distribution = new HashMap<>(); classCount = new HashMap<>(); data = new Instances(data); //Delete data tanpa kelas data.deleteWithMissingClass();//w w w.ja v a 2s . com //melakukan filter discretize untuk mengubah atribut menjadi nominal //menghitung jumlah instance m_Instances = new Instances(data); numInstance = data.numInstances(); //Enumerasi seluruh atribut instances Enumeration<Attribute> enumAttr = m_Instances.enumerateAttributes(); //Index attribut saat ini int attrIndex = 0; //Hashmap untuk menghitung jumlah kemunculan kelas yang bersesuaian for (int i = 0; i < m_Instances.classAttribute().numValues(); i++) { classCount.put(i + 0.0, 0); } Enumeration<Instance> forCount = m_Instances.enumerateInstances(); while (forCount.hasMoreElements()) { Instance instCount = forCount.nextElement(); classCount.put(instCount.classValue(), classCount.get(instCount.classValue()) + 1); } System.out.println("JMLAH KELAS:" + m_Instances.numClasses()); System.out.println(classCount.toString()); //Looping untuk seluruh atribut while (enumAttr.hasMoreElements()) { Attribute temp = enumAttr.nextElement(); //nama attribute String attrName = temp.name(); //Memasukkan kunci attrName if (distribution.get(attrName) == null) { distribution.put(attrName, new HashMap<String, HashMap<Double, Double>>()); } //Enumerasi dari seluruh instance pada Instances masukan Enumeration<Instance> enumInst = m_Instances.enumerateInstances(); //Looping untuk seluruh instance while (enumInst.hasMoreElements()) { //Mengambil Instance selanjutnya Instance tempInst = enumInst.nextElement(); //Nilai domain untuk atribut saat ini String nilaiDomain = tempInst.stringValue(temp); //Class dari instance ini double classAttr = tempInst.classValue(); if (distribution.get(attrName).get(nilaiDomain) == null) { //Membuat hashmap baru jika domainNilai pertama kali muncul distribution.get(attrName).put(nilaiDomain, new HashMap<Double, Double>()); } if (distribution.get(attrName).get(nilaiDomain).get(classAttr) == null) { //Membuat hashmap baru jika untuk pasangan domain nilai dan //kelas ini baru pertama kali muncul for (int i = 0; i < m_Instances.numClasses(); i++) { distribution.get(attrName).get(nilaiDomain).put(i + 0.0, 0.0); } } //Menambahkan frekuensi kemunculan +1 distribution.get(attrName).get(nilaiDomain).put(classAttr, distribution.get(attrName).get(nilaiDomain).get(classAttr) + (1.0 / classCount.get(classAttr))); } attrIndex++; } System.out.println(distribution.toString()); System.out.println(classCount.toString()); }
From source file:net.paudan.evosvm.LibLINEAR.java
License:Open Source License
/** * builds the classifier//from w w w. j a va 2 s . co m * * @param insts the training instances * @throws Exception if liblinear classes not in classpath or liblinear * encountered a problem */ public void buildClassifier(Instances insts) throws Exception { m_NominalToBinary = null; m_Filter = null; // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); if (!getDoNotReplaceMissingValues()) { m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(insts); insts = Filter.useFilter(insts, m_ReplaceMissingValues); } // can classifier handle the data? // we check this here so that if the user turns off // replace missing values filtering, it will fail // if the data actually does have missing values getCapabilities().testWithFail(insts); if (getConvertNominalToBinary()) { insts = nominalToBinary(insts); } if (getNormalize()) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } int[] vy = new int[insts.numInstances()]; FeatureNode[][] vx = new FeatureNode[insts.numInstances()][]; int max_index = 0; for (int d = 0; d < insts.numInstances(); d++) { Instance inst = insts.instance(d); FeatureNode[] x = instanceToArray(inst); if (x.length > 0) { max_index = Math.max(max_index, x[x.length - 1].index); } vx[d] = x; double classValue = inst.classValue(); int classValueInt = (int) classValue; if (classValueInt != classValue) throw new RuntimeException("unsupported class value: " + classValue); vy[d] = classValueInt; } if (!m_Debug) { Linear.disableDebugOutput(); } else { Linear.enableDebugOutput(); } // reset the PRNG for regression-stable results Linear.resetRandom(); // train model m_Model = Linear.train(getProblem(vx, vy, max_index), getParameters()); }