Example usage for weka.core Instances numClasses

Introduction

In this page you can find the example usage for weka.core Instances numClasses.

Prototype


publicint numClasses()

Source Link

Document

Returns the number of class labels.

Usage

From source file:machinelearninglabs.OENaiveBayesClassifier.java

public double[][] attributeProbs(Instances data, int att) {
    int numberOfPossibleValuesForAttribute = data.firstInstance().attribute(att).numValues();
    double[][] result = new double[data.numClasses()][numberOfPossibleValuesForAttribute];

    // for each class
    for (Instance eachInstance : data) {
        double classValue = eachInstance.value(eachInstance.classIndex());
        result[(int) classValue][(int) eachInstance.value(att)]++;
    }/*from   w  w  w.j ava  2  s.c  o  m*/

    // Get conditional probabilities ie probability that attribute = x given some class
    for (int i = 0; i < result.length; i++) {
        for (int j = 0; j < result[i].length; j++) {
            result[i][j] = (double) result[i][j] / classCount[i];
        }
    }
    //printDoubleMatrix(result);
    return result;
}

From source file:machinelearningq2.BasicNaiveBayesV1.java

/**
 *
 * This initial classifier will contain a two dimension array of counts
 *
 * @param ins/*from w w w. j  a v  a  2 s.  com*/
 * @throws Exception
 */
@Override
public void buildClassifier(Instances ins) throws Exception {
    ins.setClassIndex(ins.numAttributes() - 1);
    countData = ins.size();
    // assigns the class position of the instance 
    classValueCounts = new int[ins.numClasses()];
    System.out.println(ins);
    if (laplace == true) {
        laplaceCorrection(ins);
    }
    // store the values
    for (Instance line : ins) {
        double classValue = line.classValue();
        classValueCounts[(int) classValue]++;
        for (int i = 0; i < line.numAttributes() - 1; i++) {
            double attributeValue = line.value(i);
            DataFound d = new DataFound(attributeValue, classValue, i);
            int index = data.indexOf(d);
            // then it doesn't exist
            if (index == -1) {
                data.add(d);
            } else {
                data.get(index).incrementCount();
            }
        }
    }
    System.out.println("");

    System.out.println(Arrays.toString(classValueCounts));

}

From source file:machinelearningq2.BasicNaiveBayesV1.java

/**
 *
 * Performs lapalce correction to ensure there are no zero values in the
 * data Creating a DataFound object ensures the count starts from 1
 *
 * @param instnc//from  www  . j a  v a  2  s  .  c  o m
 * @return
 * @throws Exception
 */
public void laplaceCorrection(Instances inst) throws ParseException {
    inst.setClassIndex(inst.numAttributes() - 1);
    for (int c = 0; c < inst.numClasses(); c++) {
        for (int j = 0; j < inst.numAttributes() - 1; j++) {
            for (int i = 0; i < inst.numDistinctValues(j); i++) {
                String attributeValue = inst.attribute(j).value(i);
                NumberFormat nf = NumberFormat.getInstance();
                double atval = nf.parse(attributeValue).doubleValue();
                DataFound d = new DataFound(atval, c, i);
                data.add(d);
            }
        }
    }
}

From source file:machinelearningq2.ExtendedNaiveBayes.java

/**
 *
 * Build classifier will either build a gaussian or a discrete classifier
 * dependent on user input/*from   w ww.  j  a va  2  s.  c  om*/
 *
 * @param ins
 * @throws Exception
 */
@Override
public void buildClassifier(Instances ins) throws Exception {
    if ("d".equals(gausianOrDiscretise)) {
        buildDiscreteClassifier(ins);
    } else {
        countData = ins.size();
        // assigns the class position of the instance 
        ins.setClassIndex(ins.numAttributes() - 1);
        classValueCounts = new int[ins.numClasses()];
        attributeMeans = new double[ins.numClasses()][ins.numAttributes() - 1];
        attributeVariance = new double[ins.numClasses()][ins.numAttributes() - 1];

        // store the values
        for (Instance line : ins) {
            double classValue = line.classValue();
            classValueCounts[(int) classValue]++;
            for (int i = 0; i < line.numAttributes() - 1; i++) {
                double attributeValue = line.value(i);
                attributeMeans[(int) classValue][i] += attributeValue;
                DataFound d = new DataFound(attributeValue, classValue, i);

                int index = data.indexOf(d);
                // then it doesn't exist
                if (index == -1) {
                    data.add(d);
                } else {
                    data.get(index).incrementCount();
                }
            }
        }
        System.out.println("Attribute Totals: " + Arrays.deepToString(attributeMeans));
        // computes the means
        for (int j = 0; j < classValueCounts.length; j++) {
            for (int i = 0; i < ins.numAttributes() - 1; i++) {
                attributeMeans[j][i] = attributeMeans[j][i] / classValueCounts[j];
            }
        }

        // calculate the variance
        for (int i = 0; i < data.size(); i++) {
            double cv = data.get(i).getClassValue();
            double atIn = data.get(i).getAttributeIndex();
            double squareDifference = Math
                    .pow(data.get(i).getAttributeValue() - attributeMeans[(int) cv][(int) atIn], 2);
            attributeVariance[(int) cv][(int) atIn] += squareDifference;
        }
        for (int j = 0; j < classValueCounts.length; j++) {
            for (int i = 0; i < ins.numAttributes() - 1; i++) {
                attributeVariance[j][i] = attributeVariance[j][i] / (classValueCounts[j] - 1);
                attributeVariance[j][i] = Math.sqrt(attributeVariance[j][i]);
            }
        }
        System.out.println("Attribute Means: " + Arrays.deepToString(attributeMeans));
        System.out.println("Variance: " + Arrays.deepToString(attributeVariance));
    }
}

From source file:machinelearningq2.ExtendedNaiveBayes.java

/**
 * The method buildDiscreteClassifier discretizes the data and then builds a
 * classifer//  w w  w  .j  av a 2 s. c o  m
 *
 * @param instnc
 * @return
 * @throws Exception
 */
public void buildDiscreteClassifier(Instances ins) throws Exception {
    ins = discretize(ins);
    ins.setClassIndex(ins.numAttributes() - 1);
    countData = ins.size();
    // assigns the class position of the instance 
    classValueCounts = new int[ins.numClasses()];
    // store the values
    for (Instance line : ins) {
        double classValue = line.classValue();
        classValueCounts[(int) classValue]++;
        for (int i = 0; i < line.numAttributes() - 1; i++) {
            double attributeValue = line.value(i);
            DataFound d = new DataFound(attributeValue, classValue, i);
            int index = data.indexOf(d);
            // then it doesn't exist
            if (index == -1) {
                data.add(d);
            } else {
                data.get(index).incrementCount();
            }
        }
    }

}

From source file:machine_learing_clasifier.MyC45.java

public boolean calculateErrorPrune(Instances i, int order) throws Exception {
    double before, after;
    before = PercentageSplit.percentageSplitRate(i, head);
    //MyC45 temp = this.parent.m_Successors[order];
    Attribute temp = this.parent.m_Attribute;
    this.parent.m_Attribute = null;
    double maxafter = 0;
    double maxclass = -1;
    for (int x = 0; x < i.numClasses(); x++) {
        this.parent.m_ClassValue = (double) x;
        after = PercentageSplit.percentageSplitRate(i, head);
        if (after > maxafter) {
            maxclass = x;//from  w w  w.j a v a 2s  . com
            maxafter = after;
        }
    }

    this.parent.m_ClassValue = maxclass;

    //this.parent.m_Successors[order] = null;
    if (before >= maxafter) {
        this.parent.m_Attribute = temp;
        return false;
    } else {
        System.out.println("prune!!!");
        return true;
    }
}

From source file:machine_learing_clasifier.MyC45.java

public double computeEntropy(Instances inst) {
    double[] classCount = new double[inst.numClasses()];
    for (int i = 0; i < inst.numInstances(); i++) {
        int temp = (int) inst.instance(i).classValue();
        classCount[temp]++;/*from ww w  .ja v a2 s  .  c  om*/
    }
    double entropy = 0;
    for (int i = 0; i < inst.numClasses(); i++) {
        if (classCount[i] > 0) {
            entropy -= classCount[i] * Utils.log2(classCount[i] / inst.numInstances());
        }
    }
    entropy /= (double) inst.numInstances();
    return entropy;
}

From source file:machine_learing_clasifier.MyC45.java

public void makeTree(Instances data) throws Exception {
    if (data.numInstances() == 0) {
        return;/*from   www. j a v a  2s. c o  m*/
    }

    double[] infoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        Attribute att = data.attribute(i);
        if (data.classIndex() != att.index()) {
            if (att.isNominal()) {
                infoGains[att.index()] = computeInformationGain(data, att);
            } else {
                infoGains[att.index()] = computeInformationGainContinous(data, att,
                        BestContinousAttribute(data, att));
            }
        }
    }

    m_Attribute = data.attribute(Utils.maxIndex(infoGains));
    if (m_Attribute.isNumeric()) {
        numericAttThreshold = BestContinousAttribute(data, m_Attribute);
        System.out.println(" ini kalo continous dengan attribut : " + numericAttThreshold);
    }
    System.out.println("huhu = " + m_Attribute.toString());

    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        for (int i = 0; i < data.numInstances(); i++) {
            int inst = (int) data.instance(i).value(data.classAttribute());
            m_Distribution[inst]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData;
        if (m_Attribute.isNominal()) {
            splitData = splitData(data, m_Attribute);
        } else {
            splitData = splitDataContinous(data, m_Attribute, numericAttThreshold);
        }

        if (m_Attribute.isNominal()) {
            System.out.println("nominal");
            m_Successors = new MyC45[m_Attribute.numValues()];
            System.out.println(m_Successors.length);
            for (int j = 0; j < m_Attribute.numValues(); j++) {
                m_Successors[j] = new MyC45(head, this);
                m_Successors[j].buildClassifier(splitData[j]);
            }
        } else {
            System.out.println("numeric");
            m_Successors = new MyC45[2];
            System.out.println(m_Successors.length);
            for (int j = 0; j < 2; j++) {
                m_Successors[j] = new MyC45(head, this);
                m_Successors[j].buildClassifier(splitData[j]);
            }
        }
    }
}

From source file:machine_learing_clasifier.MyID3.java

public void makeTree(Instances data) throws Exception {
    if (data.numInstances() == 0) {
        return;/*from w  ww.  j ava 2s . co  m*/
    }

    double[] infoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        Attribute att = data.attribute(i);
        if (data.classIndex() != att.index()) {
            infoGains[att.index()] = computeInformationGain(data, att);
        }
    }

    m_Attribute = data.attribute(Utils.maxIndex(infoGains));
    //System.out.println("huhu = " + m_Attribute.toString());

    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        for (int i = 0; i < data.numInstances(); i++) {
            int inst = (int) data.instance(i).value(data.classAttribute());
            m_Distribution[inst]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new MyID3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new MyID3();
            m_Successors[j].buildClassifier(splitData[j]);
        }
    }
}

From source file:main.NaiveBayes.java

License:Open Source License

/**
 * Generates the classifier./*from www .  jav a 2s . c om*/
 * 
 * @param instances set of instances serving as training data
 * @exception Exception if the classifier has not been generated successfully
 */
@Override
public void buildClassifier(Instances instances) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    m_NumClasses = instances.numClasses();

    // Copy the instances
    m_Instances = new Instances(instances);

    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }

    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration<Attribute> enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = enu.nextElement();

        // If the attribute is numeric, determine the estimator
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }

        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch (attribute.type()) {
            case Attribute.NUMERIC:
                if (m_UseKernelEstimator) {
                    m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                } else {
                    m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                }
                break;
            case Attribute.NOMINAL:
                m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                break;
            default:
                throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }

    // Compute counts
    Enumeration<Instance> enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = enumInsts.nextElement();
        updateClassifier(instance);
    }

    // Save space
    m_Instances = new Instances(m_Instances, 0);
}