Example usage for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:WLSVM.java

License:Open Source License

/**
 * Converts an ARFF Instance into a string in the sparse format accepted by
 * LIBSVM//w  ww .j  a v a 2s . co m
 * 
 * @param instance
 * @return
 */
protected String InstanceToSparse(Instance instance) {
    String line = new String();
    int c = (int) instance.classValue();
    if (c == 0)
        c = -1;
    line = c + " ";
    for (int j = 1; j < instance.numAttributes(); j++) {
        if (j - 1 == instance.classIndex()) {
            continue;
        }
        if (instance.isMissing(j - 1))
            continue;
        if (instance.value(j - 1) != 0)
            line += " " + j + ":" + instance.value(j - 1);
    }
    // System.out.println(line); 
    return (line + "\n");
}

From source file:REPTree.java

License:Open Source License

/**
 * Builds classifier./*from   w ww .java 2  s. co m*/
 * 
 * @param data the data to train with
 * @throws Exception if building fails
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_Seed);

    m_zeroR = null;
    if (data.numAttributes() == 1) {
        m_zeroR = new ZeroR();
        m_zeroR.buildClassifier(data);
        return;
    }

    // Randomize and stratify
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(m_NumFolds);
    }

    // Split data into training and pruning set
    Instances train = null;
    Instances prune = null;
    if (!m_NoPruning) {
        train = data.trainCV(m_NumFolds, 0, random);
        prune = data.testCV(m_NumFolds, 0);
    } else {
        train = data;
    }

    // Create array of sorted indices and weights
    int[][][] sortedIndices = new int[1][train.numAttributes()][0];
    double[][][] weights = new double[1][train.numAttributes()][0];
    double[] vals = new double[train.numInstances()];
    for (int j = 0; j < train.numAttributes(); j++) {
        if (j != train.classIndex()) {
            weights[0][j] = new double[train.numInstances()];
            if (train.attribute(j).isNominal()) {

                // Handling nominal attributes. Putting indices of
                // instances with missing values at the end.
                sortedIndices[0][j] = new int[train.numInstances()];
                int count = 0;
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (!inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
            } else {

                // Sorted indices are computed for numeric attributes
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    vals[i] = inst.value(j);
                }
                sortedIndices[0][j] = Utils.sort(vals);
                for (int i = 0; i < train.numInstances(); i++) {
                    weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight();
                }
            }
        }
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    double totalWeight = 0, totalSumSquared = 0;
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        if (data.classAttribute().isNominal()) {
            classProbs[(int) inst.classValue()] += inst.weight();
            totalWeight += inst.weight();
        } else {
            classProbs[0] += inst.classValue() * inst.weight();
            totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
            totalWeight += inst.weight();
        }
    }
    m_Tree = new Tree();
    double trainVariance = 0;
    if (data.classAttribute().isNumeric()) {
        trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
        classProbs[0] /= totalWeight;
    }

    // Build tree
    m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum,
            m_MinVarianceProp * trainVariance, 0, m_MaxDepth);

    // Insert pruning data and perform reduced error pruning
    if (!m_NoPruning) {
        m_Tree.insertHoldOutSet(prune);
        m_Tree.reducedErrorPrune();
        m_Tree.backfitHoldOutSet();
    }
}

From source file:Pair.java

License:Open Source License

/**
 * Sets the weights for the next iteration.
 *//*from  w  w w. ja v a2 s  . co m*/
protected double setWeights(Instances trainData, Classifier cls, double sourceFraction, int numSourceInstances,
        boolean isFinal) throws Exception {

    Enumeration enu = trainData.enumerateInstances();
    int instNum = 0;
    double[] errors = new double[trainData.numInstances()];
    double max = 0;
    int i = 0;
    while (enu.hasMoreElements()) {
        Instance instance = (Instance) enu.nextElement();
        errors[i] = Math.abs(cls.classifyInstance(instance) - instance.classValue());
        if (i >= numSourceInstances && errors[i] > max)
            max = errors[i];
        i++;
    }

    if (max == 0)
        return -1;

    //get avg loss
    double loss = 0;
    double initialTWeightSum = 0;
    double allWeightSum = 0;
    for (int j = 0; j < errors.length; j++) {
        errors[j] /= max;
        Instance instance = trainData.instance(j);
        loss += instance.weight() * errors[j];
        if (j >= numSourceInstances) {
            //loss += instance.weight() * errors[j];
            initialTWeightSum += instance.weight();
        }
        allWeightSum += instance.weight();
    }
    //loss /= weightSum;
    loss /= allWeightSum;

    targetWeight = initialTWeightSum / allWeightSum;
    /*
    if (!isFinal){
    System.out.println("Target weight: " + targetWeight);
    System.out.println("max: " + max);
    System.out.println("avg error: " + loss * max);
    System.out.println("Loss: " + loss);
    }
    */

    double beta;

    if (fixedBeta)
        beta = 0.4 / 0.6;
    else {
        if (isFinal && loss > 0.499)//bad, so quit
            //return -1;
            loss = 0.499; //since we're doing CV, no reason to quit

        beta = loss / (1 - loss); //or just use beta = .4/.6, since beta isn't as meaningful in AdaBoost.R2;
    }

    double tWeightSum = 0;
    if (!isFinal) {
        //need to find b so that weight of source be sourceFraction*num source
        //do binary search
        double goal = sourceFraction * errors.length;
        double bMin = .001;
        double bMax = .999;
        double b;
        double sourceSum = 0;
        while (bMax - bMin > .001) {
            b = (bMax + bMin) / 2;
            double sum = 0;
            for (int j = 0; j < numSourceInstances; j++) {
                Instance instance = trainData.instance(j);
                sum += Math.pow(b, errors[j]) * instance.weight();
            }
            if (sum > goal)
                bMax = b;
            else
                bMin = b;
        }
        b = (bMax + bMin) / 2;
        //System.out.println(b);         
        for (int j = 0; j < numSourceInstances; j++) {
            Instance instance = trainData.instance(j);
            instance.setWeight(instance.weight() * Math.pow(bMin, errors[j]));
            sourceSum += instance.weight();
        }

        //now adjust target weights
        goal = errors.length - sourceSum;
        double m = goal / initialTWeightSum;

        for (int j = numSourceInstances; j < errors.length; j++) {
            Instance instance = trainData.instance(j);
            instance.setWeight(instance.weight() * m);
        }
    } else {//final
        if (!doUpsource) { //modify only target weights
            for (int j = numSourceInstances; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * Math.pow(beta, -errors[j]));
                tWeightSum += instance.weight();
            }

            double weightSumInverse = initialTWeightSum / tWeightSum;
            for (int j = numSourceInstances; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * weightSumInverse);
            }
        } else { //modify all weights
            for (int j = 0; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * Math.pow(beta, -errors[j]));
                tWeightSum += instance.weight();
            }

            double weightSumInverse = errors.length / tWeightSum;
            for (int j = 0; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * weightSumInverse);
            }
        }

    }

    return beta;
}

From source file:REPRandomTree.java

License:Open Source License

/**
 * Builds classifier.//from   w  w w.ja v  a2 s  . c  om
 * 
 * @param data the data to train with
 * @throws Exception if building fails
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_Seed);

    m_zeroR = null;
    if (data.numAttributes() == 1) {
        m_zeroR = new ZeroR();
        m_zeroR.buildClassifier(data);
        return;
    }

    // Randomize and stratify
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(m_NumFolds);
    }

    // Split data into training and pruning set
    Instances train = null;
    Instances prune = null;
    if (!m_NoPruning) {
        train = data.trainCV(m_NumFolds, 0, random);
        prune = data.testCV(m_NumFolds, 0);
    } else {
        train = data;
    }

    // Create array of sorted indices and weights
    int[][][] sortedIndices = new int[1][train.numAttributes()][0];
    double[][][] weights = new double[1][train.numAttributes()][0];
    double[] vals = new double[train.numInstances()];
    for (int j = 0; j < train.numAttributes(); j++) {
        if (j != train.classIndex()) {
            weights[0][j] = new double[train.numInstances()];
            if (train.attribute(j).isNominal()) {

                // Handling nominal attributes. Putting indices of
                // instances with missing values at the end.
                sortedIndices[0][j] = new int[train.numInstances()];
                int count = 0;
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (!inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
            } else {

                // Sorted indices are computed for numeric attributes
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    vals[i] = inst.value(j);
                }
                sortedIndices[0][j] = Utils.sort(vals);
                for (int i = 0; i < train.numInstances(); i++) {
                    weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight();
                }
            }
        }
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    double totalWeight = 0, totalSumSquared = 0;
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        if (data.classAttribute().isNominal()) {
            classProbs[(int) inst.classValue()] += inst.weight();
            totalWeight += inst.weight();
        } else {
            classProbs[0] += inst.classValue() * inst.weight();
            totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
            totalWeight += inst.weight();
        }
    }
    m_Tree = new Tree();
    double trainVariance = 0;
    if (data.classAttribute().isNumeric()) {
        trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
        classProbs[0] /= totalWeight;
    }

    // Build tree
    m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum,
            m_MinVarianceProp * trainVariance, 0, m_MaxDepth, m_FeatureFrac, random);

    // Insert pruning data and perform reduced error pruning
    if (!m_NoPruning) {
        m_Tree.insertHoldOutSet(prune);
        m_Tree.reducedErrorPrune();
        m_Tree.backfitHoldOutSet();
    }
}

From source file:MultiClassClassifier.java

License:Open Source License

public double[][] calibratedDistributionForTestInstances(Instances test) throws Exception {
    double[][] binProbs = new double[m_Classifiers.length][test.numInstances()];
    double[][] calibratedProbs = new double[m_Classifiers.length][test.numInstances()];
    boolean[] target = new boolean[test.numInstances()];
    int prior1 = 0;
    int prior0 = 0;
    if (m_Classifiers.length == 1) {
        for (int i = 0; i < test.numInstances(); i++) {
            Instance inst = test.instance(i);
            //m_ClassFilters[0].input(inst);
            //m_ClassFilters[0].batchFinished();
            //Instance filteredInst = m_ClassFilters[i].output();

            //binProbs[0][i] = (200*m_Classifiers[0].distributionForInstance(inst)[1])-100;
            binProbs[0][i] = m_Classifiers[0].distributionForInstance(inst)[1];
            if (target[i] = inst.classValue() == 1.0)
                prior1++;//from ww  w.  j  ava 2s .co m
            else
                prior0++;
        }
        calibratedProbs[0] = sigTraining(binProbs[0], target, prior1, prior0);
        return calibratedProbs;
    } else {

        double[] probs = new double[test.classAttribute().numValues()];

        if (m_Method == METHOD_1_AGAINST_1) {
            throw new Exception("Not implemented for Method 1 against 1");
            /*double[][] r = new double[inst.numClasses()][inst.numClasses()];
            double[][] n = new double[inst.numClasses()][inst.numClasses()];
                    
            for(int i = 0; i < m_ClassFilters.length; i++) 
            {
               if (m_Classifiers[i] != null) {
                  Instance tempInst = (Instance)inst.copy(); 
                  tempInst.setDataset(m_TwoClassDataset);
                  double [] current = m_Classifiers[i].distributionForInstance(tempInst);  
                  Range range = new Range(((RemoveWithValues)m_ClassFilters[i])
                    .getNominalIndices());
                  range.setUpper(m_ClassAttribute.numValues());
                  int[] pair = range.getSelection();
                  if (m_pairwiseCoupling && inst.numClasses() > 2) {
                     r[pair[0]][pair[1]] = current[0];
                     n[pair[0]][pair[1]] = m_SumOfWeights[i];
                  }
                  else {
                     if (current[0] > current[1]) {
             probs[pair[0]] += 1.0;
                     }
                     else {
             probs[pair[1]] += 1.0;
                     }
                  }
               }
            }
            if (m_pairwiseCoupling && inst.numClasses() > 2) {
              return pairwiseCoupling(n, r);
            }*/
        } else {
            // error correcting style methods
            for (int i = 0; i < m_ClassFilters.length; i++) {
                prior1 = 0;
                prior0 = 0;
                for (int k = 0; k < test.numInstances(); k++) {
                    Instance inst = test.instance(k);
                    m_ClassFilters[i].input(inst);
                    m_ClassFilters[i].batchFinished();
                    Instance filteredInst = m_ClassFilters[i].output();
                    //binProbs[i][k] = (200*m_Classifiers[i].distributionForInstance(filteredInst)[1]) - 100;
                    binProbs[i][k] = m_Classifiers[i].distributionForInstance(filteredInst)[1];

                    //System.out.println(binProbs[i][k] + " " + inst.classValue());
                    //System.out.println("Class value: " + filteredInst.classValue() + " " + filteredInst.stringValue(filteredInst.numAttributes()-1) + " " + m_Classifiers[i].distributionForInstance(filteredInst)[0] + " " + m_Classifiers[i].distributionForInstance(filteredInst)[1]);
                    if (target[k] = (filteredInst.classValue() == 1.0))
                        prior1++;
                    else
                        prior0++;

                    /*for (int j = 0; j < m_ClassAttribute.numValues(); j++)
                    {
                       if (((MakeIndicator)m_ClassFilters[i]).getValueRange().isInRange(j))
                       {
                          binProbs[j] += current[1];
                       }
                       else 
                       {
                          binProbs[j] += current[0];
                       }
                    }*/
                }
                calibratedProbs[i] = sigTraining(binProbs[i], target, prior1, prior0);
            }
            /*         for (int k = 0; k < test.numInstances(); k++)   
                     {
                        for (int i =0; i < 3; i++)
                        System.out.println(i + " " + k + " cal: " + calibratedProbs[i][k] + " " + binProbs[i][k]);
                     }
               */ }
    }
    for (int i = 0; i < test.numInstances(); i++) {
        double sum = 0;
        for (int j = 0; j < m_Classifiers.length; j++) {
            sum += calibratedProbs[j][i];
        }
        for (int j = 0; j < m_Classifiers.length; j++)
            calibratedProbs[j][i] /= sum;
    }
    return calibratedProbs;
    /*
    if (Utils.gr(Utils.sum(probs), 0)) 
    {
      Utils.normalize(probs);
      return probs;
    }
    else {
       return m_ZeroR.distributionForInstance(inst);
    }*/
}

From source file:GrowTree.java

public boolean homogeneous(Instances D) {
    distribution = new double[D.numClasses()];
    Enumeration eninst = D.enumerateInstances();
    while (eninst.hasMoreElements()) {
        Instance ele = (Instance) eninst.nextElement();
        distribution[(int) ele.classValue()]++;
    }/* w ww  . j a  v a 2  s. c  o m*/

    int cnt = 0;
    for (int i = 0; i < D.numClasses(); i++) {
        if (distribution[i] > 0)
            cnt++;
    }
    if (cnt <= 1) // if all instances are of single class
        return true;
    else
        return false;
}

From source file:GrowTree.java

double label(Instances D) {
    Enumeration eninst = D.enumerateInstances();

    Instance ele = (Instance) eninst.nextElement();
    return ele.classValue();
}

From source file:GrowTree.java

public double imp(Instances data) {
    double localdistribution[] = new double[data.numClasses()];
    Enumeration eninst = data.enumerateInstances();
    while (eninst.hasMoreElements()) {
        Instance ele = (Instance) eninst.nextElement();
        localdistribution[(int) ele.classValue()]++;
    }/* w w  w. j  av  a 2  s .com*/

    return imp;
}

From source file:SMO.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems./*from w w  w .  j av a  2  s  .  c  o m*/
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);

        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();

        /* Removes all the instances with weight equal to 0.
         MUST be done since condition (8) of Keerthi's paper 
         is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }

        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}

From source file:ID3Chi.java

License:Open Source License

private void MakeALeaf(Instances data) {

    data.deleteWithMissing(m_Attribute);

    if (data.numInstances() == 0) {
        SetNullDistribution(data);//  w  w  w . ja  v  a 2 s.  c  o  m
        return;
    }

    m_Distribution = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        m_Distribution[(int) inst.classValue()]++;
    }
    Utils.normalize(m_Distribution);
    m_ClassValue = Utils.maxIndex(m_Distribution);
    m_ClassAttribute = data.classAttribute();

    // set m_Attribute to null to mark this node as a leaf
    m_Attribute = null;
}