List of usage examples for weka.core Instances numClasses
publicint numClasses()
From source file:machinelearninglabs.OENaiveBayesClassifier.java
public double[][] attributeProbs(Instances data, int att) { int numberOfPossibleValuesForAttribute = data.firstInstance().attribute(att).numValues(); double[][] result = new double[data.numClasses()][numberOfPossibleValuesForAttribute]; // for each class for (Instance eachInstance : data) { double classValue = eachInstance.value(eachInstance.classIndex()); result[(int) classValue][(int) eachInstance.value(att)]++; }/*from w w w.j ava 2 s.c o m*/ // Get conditional probabilities ie probability that attribute = x given some class for (int i = 0; i < result.length; i++) { for (int j = 0; j < result[i].length; j++) { result[i][j] = (double) result[i][j] / classCount[i]; } } //printDoubleMatrix(result); return result; }
From source file:machinelearningq2.BasicNaiveBayesV1.java
/** * * This initial classifier will contain a two dimension array of counts * * @param ins/*from w w w. j a v a 2 s. com*/ * @throws Exception */ @Override public void buildClassifier(Instances ins) throws Exception { ins.setClassIndex(ins.numAttributes() - 1); countData = ins.size(); // assigns the class position of the instance classValueCounts = new int[ins.numClasses()]; System.out.println(ins); if (laplace == true) { laplaceCorrection(ins); } // store the values for (Instance line : ins) { double classValue = line.classValue(); classValueCounts[(int) classValue]++; for (int i = 0; i < line.numAttributes() - 1; i++) { double attributeValue = line.value(i); DataFound d = new DataFound(attributeValue, classValue, i); int index = data.indexOf(d); // then it doesn't exist if (index == -1) { data.add(d); } else { data.get(index).incrementCount(); } } } System.out.println(""); System.out.println(Arrays.toString(classValueCounts)); }
From source file:machinelearningq2.BasicNaiveBayesV1.java
/** * * Performs lapalce correction to ensure there are no zero values in the * data Creating a DataFound object ensures the count starts from 1 * * @param instnc//from www . j a v a 2 s . c o m * @return * @throws Exception */ public void laplaceCorrection(Instances inst) throws ParseException { inst.setClassIndex(inst.numAttributes() - 1); for (int c = 0; c < inst.numClasses(); c++) { for (int j = 0; j < inst.numAttributes() - 1; j++) { for (int i = 0; i < inst.numDistinctValues(j); i++) { String attributeValue = inst.attribute(j).value(i); NumberFormat nf = NumberFormat.getInstance(); double atval = nf.parse(attributeValue).doubleValue(); DataFound d = new DataFound(atval, c, i); data.add(d); } } } }
From source file:machinelearningq2.ExtendedNaiveBayes.java
/** * * Build classifier will either build a gaussian or a discrete classifier * dependent on user input/*from w ww. j a va 2 s. c om*/ * * @param ins * @throws Exception */ @Override public void buildClassifier(Instances ins) throws Exception { if ("d".equals(gausianOrDiscretise)) { buildDiscreteClassifier(ins); } else { countData = ins.size(); // assigns the class position of the instance ins.setClassIndex(ins.numAttributes() - 1); classValueCounts = new int[ins.numClasses()]; attributeMeans = new double[ins.numClasses()][ins.numAttributes() - 1]; attributeVariance = new double[ins.numClasses()][ins.numAttributes() - 1]; // store the values for (Instance line : ins) { double classValue = line.classValue(); classValueCounts[(int) classValue]++; for (int i = 0; i < line.numAttributes() - 1; i++) { double attributeValue = line.value(i); attributeMeans[(int) classValue][i] += attributeValue; DataFound d = new DataFound(attributeValue, classValue, i); int index = data.indexOf(d); // then it doesn't exist if (index == -1) { data.add(d); } else { data.get(index).incrementCount(); } } } System.out.println("Attribute Totals: " + Arrays.deepToString(attributeMeans)); // computes the means for (int j = 0; j < classValueCounts.length; j++) { for (int i = 0; i < ins.numAttributes() - 1; i++) { attributeMeans[j][i] = attributeMeans[j][i] / classValueCounts[j]; } } // calculate the variance for (int i = 0; i < data.size(); i++) { double cv = data.get(i).getClassValue(); double atIn = data.get(i).getAttributeIndex(); double squareDifference = Math .pow(data.get(i).getAttributeValue() - attributeMeans[(int) cv][(int) atIn], 2); attributeVariance[(int) cv][(int) atIn] += squareDifference; } for (int j = 0; j < classValueCounts.length; j++) { for (int i = 0; i < ins.numAttributes() - 1; i++) { attributeVariance[j][i] = attributeVariance[j][i] / (classValueCounts[j] - 1); attributeVariance[j][i] = Math.sqrt(attributeVariance[j][i]); } } System.out.println("Attribute Means: " + Arrays.deepToString(attributeMeans)); System.out.println("Variance: " + Arrays.deepToString(attributeVariance)); } }
From source file:machinelearningq2.ExtendedNaiveBayes.java
/** * The method buildDiscreteClassifier discretizes the data and then builds a * classifer// w w w .j av a 2 s. c o m * * @param instnc * @return * @throws Exception */ public void buildDiscreteClassifier(Instances ins) throws Exception { ins = discretize(ins); ins.setClassIndex(ins.numAttributes() - 1); countData = ins.size(); // assigns the class position of the instance classValueCounts = new int[ins.numClasses()]; // store the values for (Instance line : ins) { double classValue = line.classValue(); classValueCounts[(int) classValue]++; for (int i = 0; i < line.numAttributes() - 1; i++) { double attributeValue = line.value(i); DataFound d = new DataFound(attributeValue, classValue, i); int index = data.indexOf(d); // then it doesn't exist if (index == -1) { data.add(d); } else { data.get(index).incrementCount(); } } } }
From source file:machine_learing_clasifier.MyC45.java
public boolean calculateErrorPrune(Instances i, int order) throws Exception { double before, after; before = PercentageSplit.percentageSplitRate(i, head); //MyC45 temp = this.parent.m_Successors[order]; Attribute temp = this.parent.m_Attribute; this.parent.m_Attribute = null; double maxafter = 0; double maxclass = -1; for (int x = 0; x < i.numClasses(); x++) { this.parent.m_ClassValue = (double) x; after = PercentageSplit.percentageSplitRate(i, head); if (after > maxafter) { maxclass = x;//from w w w.j a v a 2s . com maxafter = after; } } this.parent.m_ClassValue = maxclass; //this.parent.m_Successors[order] = null; if (before >= maxafter) { this.parent.m_Attribute = temp; return false; } else { System.out.println("prune!!!"); return true; } }
From source file:machine_learing_clasifier.MyC45.java
public double computeEntropy(Instances inst) { double[] classCount = new double[inst.numClasses()]; for (int i = 0; i < inst.numInstances(); i++) { int temp = (int) inst.instance(i).classValue(); classCount[temp]++;/*from ww w .ja v a2 s . c om*/ } double entropy = 0; for (int i = 0; i < inst.numClasses(); i++) { if (classCount[i] > 0) { entropy -= classCount[i] * Utils.log2(classCount[i] / inst.numInstances()); } } entropy /= (double) inst.numInstances(); return entropy; }
From source file:machine_learing_clasifier.MyC45.java
public void makeTree(Instances data) throws Exception { if (data.numInstances() == 0) { return;/*from www. j a v a 2s. c o m*/ } double[] infoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { Attribute att = data.attribute(i); if (data.classIndex() != att.index()) { if (att.isNominal()) { infoGains[att.index()] = computeInformationGain(data, att); } else { infoGains[att.index()] = computeInformationGainContinous(data, att, BestContinousAttribute(data, att)); } } } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); if (m_Attribute.isNumeric()) { numericAttThreshold = BestContinousAttribute(data, m_Attribute); System.out.println(" ini kalo continous dengan attribut : " + numericAttThreshold); } System.out.println("huhu = " + m_Attribute.toString()); if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { int inst = (int) data.instance(i).value(data.classAttribute()); m_Distribution[inst]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData; if (m_Attribute.isNominal()) { splitData = splitData(data, m_Attribute); } else { splitData = splitDataContinous(data, m_Attribute, numericAttThreshold); } if (m_Attribute.isNominal()) { System.out.println("nominal"); m_Successors = new MyC45[m_Attribute.numValues()]; System.out.println(m_Successors.length); for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyC45(head, this); m_Successors[j].buildClassifier(splitData[j]); } } else { System.out.println("numeric"); m_Successors = new MyC45[2]; System.out.println(m_Successors.length); for (int j = 0; j < 2; j++) { m_Successors[j] = new MyC45(head, this); m_Successors[j].buildClassifier(splitData[j]); } } } }
From source file:machine_learing_clasifier.MyID3.java
public void makeTree(Instances data) throws Exception { if (data.numInstances() == 0) { return;/*from w ww. j ava 2s . co m*/ } double[] infoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { Attribute att = data.attribute(i); if (data.classIndex() != att.index()) { infoGains[att.index()] = computeInformationGain(data, att); } } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); //System.out.println("huhu = " + m_Attribute.toString()); if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { int inst = (int) data.instance(i).value(data.classAttribute()); m_Distribution[inst]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new MyID3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyID3(); m_Successors[j].buildClassifier(splitData[j]); } } }
From source file:main.NaiveBayes.java
License:Open Source License
/** * Generates the classifier./*from www . jav a 2s . c om*/ * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated successfully */ @Override public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); m_NumClasses = instances.numClasses(); // Copy the instances m_Instances = new Instances(instances); // Discretize instances if required if (m_UseDiscretization) { m_Disc = new weka.filters.supervised.attribute.Discretize(); m_Disc.setInputFormat(m_Instances); m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc); } else { m_Disc = null; } // Reserve space for the distributions m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()]; m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true); int attIndex = 0; Enumeration<Attribute> enu = m_Instances.enumerateAttributes(); while (enu.hasMoreElements()) { Attribute attribute = enu.nextElement(); // If the attribute is numeric, determine the estimator // numeric precision from differences between adjacent values double numPrecision = DEFAULT_NUM_PRECISION; if (attribute.type() == Attribute.NUMERIC) { m_Instances.sort(attribute); if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) { double lastVal = m_Instances.instance(0).value(attribute); double currentVal, deltaSum = 0; int distinct = 0; for (int i = 1; i < m_Instances.numInstances(); i++) { Instance currentInst = m_Instances.instance(i); if (currentInst.isMissing(attribute)) { break; } currentVal = currentInst.value(attribute); if (currentVal != lastVal) { deltaSum += currentVal - lastVal; lastVal = currentVal; distinct++; } } if (distinct > 0) { numPrecision = deltaSum / distinct; } } } for (int j = 0; j < m_Instances.numClasses(); j++) { switch (attribute.type()) { case Attribute.NUMERIC: if (m_UseKernelEstimator) { m_Distributions[attIndex][j] = new KernelEstimator(numPrecision); } else { m_Distributions[attIndex][j] = new NormalEstimator(numPrecision); } break; case Attribute.NOMINAL: m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true); break; default: throw new Exception("Attribute type unknown to NaiveBayes"); } } attIndex++; } // Compute counts Enumeration<Instance> enumInsts = m_Instances.enumerateInstances(); while (enumInsts.hasMoreElements()) { Instance instance = enumInsts.nextElement(); updateClassifier(instance); } // Save space m_Instances = new Instances(m_Instances, 0); }