Example usage for weka.core Instance classValue

List of usage examples for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:WLSVM.java

License:Open Source License

/**
 * Converts an ARFF Instance into a string in the sparse format accepted by
 * LIBSVM//w  ww .j  a v a 2s . co m
 * 
 * @param instance
 * @return
 */
protected String InstanceToSparse(Instance instance) {
    String line = new String();
    int c = (int) instance.classValue();
    if (c == 0)
        c = -1;
    line = c + " ";
    for (int j = 1; j < instance.numAttributes(); j++) {
        if (j - 1 == instance.classIndex()) {
            continue;
        }
        if (instance.isMissing(j - 1))
            continue;
        if (instance.value(j - 1) != 0)
            line += " " + j + ":" + instance.value(j - 1);
    }
    // System.out.println(line); 
    return (line + "\n");
}

From source file:REPTree.java

License:Open Source License

/**
 * Builds classifier./*from   w ww .java 2  s. co m*/
 * 
 * @param data the data to train with
 * @throws Exception if building fails
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_Seed);

    m_zeroR = null;
    if (data.numAttributes() == 1) {
        m_zeroR = new ZeroR();
        m_zeroR.buildClassifier(data);
        return;
    }

    // Randomize and stratify
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(m_NumFolds);
    }

    // Split data into training and pruning set
    Instances train = null;
    Instances prune = null;
    if (!m_NoPruning) {
        train = data.trainCV(m_NumFolds, 0, random);
        prune = data.testCV(m_NumFolds, 0);
    } else {
        train = data;
    }

    // Create array of sorted indices and weights
    int[][][] sortedIndices = new int[1][train.numAttributes()][0];
    double[][][] weights = new double[1][train.numAttributes()][0];
    double[] vals = new double[train.numInstances()];
    for (int j = 0; j < train.numAttributes(); j++) {
        if (j != train.classIndex()) {
            weights[0][j] = new double[train.numInstances()];
            if (train.attribute(j).isNominal()) {

                // Handling nominal attributes. Putting indices of
                // instances with missing values at the end.
                sortedIndices[0][j] = new int[train.numInstances()];
                int count = 0;
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (!inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
            } else {

                // Sorted indices are computed for numeric attributes
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    vals[i] = inst.value(j);
                }
                sortedIndices[0][j] = Utils.sort(vals);
                for (int i = 0; i < train.numInstances(); i++) {
                    weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight();
                }
            }
        }
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    double totalWeight = 0, totalSumSquared = 0;
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        if (data.classAttribute().isNominal()) {
            classProbs[(int) inst.classValue()] += inst.weight();
            totalWeight += inst.weight();
        } else {
            classProbs[0] += inst.classValue() * inst.weight();
            totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
            totalWeight += inst.weight();
        }
    }
    m_Tree = new Tree();
    double trainVariance = 0;
    if (data.classAttribute().isNumeric()) {
        trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
        classProbs[0] /= totalWeight;
    }

    // Build tree
    m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum,
            m_MinVarianceProp * trainVariance, 0, m_MaxDepth);

    // Insert pruning data and perform reduced error pruning
    if (!m_NoPruning) {
        m_Tree.insertHoldOutSet(prune);
        m_Tree.reducedErrorPrune();
        m_Tree.backfitHoldOutSet();
    }
}

From source file:Pair.java

License:Open Source License

/**
 * Sets the weights for the next iteration.
 *//*from  w  w w. ja v a2 s  . co m*/
protected double setWeights(Instances trainData, Classifier cls, double sourceFraction, int numSourceInstances,
        boolean isFinal) throws Exception {

    Enumeration enu = trainData.enumerateInstances();
    int instNum = 0;
    double[] errors = new double[trainData.numInstances()];
    double max = 0;
    int i = 0;
    while (enu.hasMoreElements()) {
        Instance instance = (Instance) enu.nextElement();
        errors[i] = Math.abs(cls.classifyInstance(instance) - instance.classValue());
        if (i >= numSourceInstances && errors[i] > max)
            max = errors[i];
        i++;
    }

    if (max == 0)
        return -1;

    //get avg loss
    double loss = 0;
    double initialTWeightSum = 0;
    double allWeightSum = 0;
    for (int j = 0; j < errors.length; j++) {
        errors[j] /= max;
        Instance instance = trainData.instance(j);
        loss += instance.weight() * errors[j];
        if (j >= numSourceInstances) {
            //loss += instance.weight() * errors[j];
            initialTWeightSum += instance.weight();
        }
        allWeightSum += instance.weight();
    }
    //loss /= weightSum;
    loss /= allWeightSum;

    targetWeight = initialTWeightSum / allWeightSum;
    /*
    if (!isFinal){
    System.out.println("Target weight: " + targetWeight);
    System.out.println("max: " + max);
    System.out.println("avg error: " + loss * max);
    System.out.println("Loss: " + loss);
    }
    */

    double beta;

    if (fixedBeta)
        beta = 0.4 / 0.6;
    else {
        if (isFinal && loss > 0.499)//bad, so quit
            //return -1;
            loss = 0.499; //since we're doing CV, no reason to quit

        beta = loss / (1 - loss); //or just use beta = .4/.6, since beta isn't as meaningful in AdaBoost.R2;
    }

    double tWeightSum = 0;
    if (!isFinal) {
        //need to find b so that weight of source be sourceFraction*num source
        //do binary search
        double goal = sourceFraction * errors.length;
        double bMin = .001;
        double bMax = .999;
        double b;
        double sourceSum = 0;
        while (bMax - bMin > .001) {
            b = (bMax + bMin) / 2;
            double sum = 0;
            for (int j = 0; j < numSourceInstances; j++) {
                Instance instance = trainData.instance(j);
                sum += Math.pow(b, errors[j]) * instance.weight();
            }
            if (sum > goal)
                bMax = b;
            else
                bMin = b;
        }
        b = (bMax + bMin) / 2;
        //System.out.println(b);         
        for (int j = 0; j < numSourceInstances; j++) {
            Instance instance = trainData.instance(j);
            instance.setWeight(instance.weight() * Math.pow(bMin, errors[j]));
            sourceSum += instance.weight();
        }

        //now adjust target weights
        goal = errors.length - sourceSum;
        double m = goal / initialTWeightSum;

        for (int j = numSourceInstances; j < errors.length; j++) {
            Instance instance = trainData.instance(j);
            instance.setWeight(instance.weight() * m);
        }
    } else {//final
        if (!doUpsource) { //modify only target weights
            for (int j = numSourceInstances; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * Math.pow(beta, -errors[j]));
                tWeightSum += instance.weight();
            }

            double weightSumInverse = initialTWeightSum / tWeightSum;
            for (int j = numSourceInstances; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * weightSumInverse);
            }
        } else { //modify all weights
            for (int j = 0; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * Math.pow(beta, -errors[j]));
                tWeightSum += instance.weight();
            }

            double weightSumInverse = errors.length / tWeightSum;
            for (int j = 0; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * weightSumInverse);
            }
        }

    }

    return beta;
}

From source file:REPRandomTree.java

License:Open Source License

/**
 * Builds classifier.//from   w  w w.ja v  a2 s  . c  om
 * 
 * @param data the data to train with
 * @throws Exception if building fails
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_Seed);

    m_zeroR = null;
    if (data.numAttributes() == 1) {
        m_zeroR = new ZeroR();
        m_zeroR.buildClassifier(data);
        return;
    }

    // Randomize and stratify
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(m_NumFolds);
    }

    // Split data into training and pruning set
    Instances train = null;
    Instances prune = null;
    if (!m_NoPruning) {
        train = data.trainCV(m_NumFolds, 0, random);
        prune = data.testCV(m_NumFolds, 0);
    } else {
        train = data;
    }

    // Create array of sorted indices and weights
    int[][][] sortedIndices = new int[1][train.numAttributes()][0];
    double[][][] weights = new double[1][train.numAttributes()][0];
    double[] vals = new double[train.numInstances()];
    for (int j = 0; j < train.numAttributes(); j++) {
        if (j != train.classIndex()) {
            weights[0][j] = new double[train.numInstances()];
            if (train.attribute(j).isNominal()) {

                // Handling nominal attributes. Putting indices of
                // instances with missing values at the end.
                sortedIndices[0][j] = new int[train.numInstances()];
                int count = 0;
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (!inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    if (inst.isMissing(j)) {
                        sortedIndices[0][j][count] = i;
                        weights[0][j][count] = inst.weight();
                        count++;
                    }
                }
            } else {

                // Sorted indices are computed for numeric attributes
                for (int i = 0; i < train.numInstances(); i++) {
                    Instance inst = train.instance(i);
                    vals[i] = inst.value(j);
                }
                sortedIndices[0][j] = Utils.sort(vals);
                for (int i = 0; i < train.numInstances(); i++) {
                    weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight();
                }
            }
        }
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    double totalWeight = 0, totalSumSquared = 0;
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        if (data.classAttribute().isNominal()) {
            classProbs[(int) inst.classValue()] += inst.weight();
            totalWeight += inst.weight();
        } else {
            classProbs[0] += inst.classValue() * inst.weight();
            totalSumSquared += inst.classValue() * inst.classValue() * inst.weight();
            totalWeight += inst.weight();
        }
    }
    m_Tree = new Tree();
    double trainVariance = 0;
    if (data.classAttribute().isNumeric()) {
        trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight;
        classProbs[0] /= totalWeight;
    }

    // Build tree
    m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum,
            m_MinVarianceProp * trainVariance, 0, m_MaxDepth, m_FeatureFrac, random);

    // Insert pruning data and perform reduced error pruning
    if (!m_NoPruning) {
        m_Tree.insertHoldOutSet(prune);
        m_Tree.reducedErrorPrune();
        m_Tree.backfitHoldOutSet();
    }
}

From source file:MultiClassClassifier.java

License:Open Source License

public double[][] calibratedDistributionForTestInstances(Instances test) throws Exception {
    double[][] binProbs = new double[m_Classifiers.length][test.numInstances()];
    double[][] calibratedProbs = new double[m_Classifiers.length][test.numInstances()];
    boolean[] target = new boolean[test.numInstances()];
    int prior1 = 0;
    int prior0 = 0;
    if (m_Classifiers.length == 1) {
        for (int i = 0; i < test.numInstances(); i++) {
            Instance inst = test.instance(i);
            //m_ClassFilters[0].input(inst);
            //m_ClassFilters[0].batchFinished();
            //Instance filteredInst = m_ClassFilters[i].output();

            //binProbs[0][i] = (200*m_Classifiers[0].distributionForInstance(inst)[1])-100;
            binProbs[0][i] = m_Classifiers[0].distributionForInstance(inst)[1];
            if (target[i] = inst.classValue() == 1.0)
                prior1++;//from ww  w.  j  ava 2s .co m
            else
                prior0++;
        }
        calibratedProbs[0] = sigTraining(binProbs[0], target, prior1, prior0);
        return calibratedProbs;
    } else {

        double[] probs = new double[test.classAttribute().numValues()];

        if (m_Method == METHOD_1_AGAINST_1) {
            throw new Exception("Not implemented for Method 1 against 1");
            /*double[][] r = new double[inst.numClasses()][inst.numClasses()];
            double[][] n = new double[inst.numClasses()][inst.numClasses()];
                    
            for(int i = 0; i < m_ClassFilters.length; i++) 
            {
               if (m_Classifiers[i] != null) {
                  Instance tempInst = (Instance)inst.copy(); 
                  tempInst.setDataset(m_TwoClassDataset);
                  double [] current = m_Classifiers[i].distributionForInstance(tempInst);  
                  Range range = new Range(((RemoveWithValues)m_ClassFilters[i])
                    .getNominalIndices());
                  range.setUpper(m_ClassAttribute.numValues());
                  int[] pair = range.getSelection();
                  if (m_pairwiseCoupling && inst.numClasses() > 2) {
                     r[pair[0]][pair[1]] = current[0];
                     n[pair[0]][pair[1]] = m_SumOfWeights[i];
                  }
                  else {
                     if (current[0] > current[1]) {
             probs[pair[0]] += 1.0;
                     }
                     else {
             probs[pair[1]] += 1.0;
                     }
                  }
               }
            }
            if (m_pairwiseCoupling && inst.numClasses() > 2) {
              return pairwiseCoupling(n, r);
            }*/
        } else {
            // error correcting style methods
            for (int i = 0; i < m_ClassFilters.length; i++) {
                prior1 = 0;
                prior0 = 0;
                for (int k = 0; k < test.numInstances(); k++) {
                    Instance inst = test.instance(k);
                    m_ClassFilters[i].input(inst);
                    m_ClassFilters[i].batchFinished();
                    Instance filteredInst = m_ClassFilters[i].output();
                    //binProbs[i][k] = (200*m_Classifiers[i].distributionForInstance(filteredInst)[1]) - 100;
                    binProbs[i][k] = m_Classifiers[i].distributionForInstance(filteredInst)[1];

                    //System.out.println(binProbs[i][k] + " " + inst.classValue());
                    //System.out.println("Class value: " + filteredInst.classValue() + " " + filteredInst.stringValue(filteredInst.numAttributes()-1) + " " + m_Classifiers[i].distributionForInstance(filteredInst)[0] + " " + m_Classifiers[i].distributionForInstance(filteredInst)[1]);
                    if (target[k] = (filteredInst.classValue() == 1.0))
                        prior1++;
                    else
                        prior0++;

                    /*for (int j = 0; j < m_ClassAttribute.numValues(); j++)
                    {
                       if (((MakeIndicator)m_ClassFilters[i]).getValueRange().isInRange(j))
                       {
                          binProbs[j] += current[1];
                       }
                       else 
                       {
                          binProbs[j] += current[0];
                       }
                    }*/
                }
                calibratedProbs[i] = sigTraining(binProbs[i], target, prior1, prior0);
            }
            /*         for (int k = 0; k < test.numInstances(); k++)   
                     {
                        for (int i =0; i < 3; i++)
                        System.out.println(i + " " + k + " cal: " + calibratedProbs[i][k] + " " + binProbs[i][k]);
                     }
               */ }
    }
    for (int i = 0; i < test.numInstances(); i++) {
        double sum = 0;
        for (int j = 0; j < m_Classifiers.length; j++) {
            sum += calibratedProbs[j][i];
        }
        for (int j = 0; j < m_Classifiers.length; j++)
            calibratedProbs[j][i] /= sum;
    }
    return calibratedProbs;
    /*
    if (Utils.gr(Utils.sum(probs), 0)) 
    {
      Utils.normalize(probs);
      return probs;
    }
    else {
       return m_ZeroR.distributionForInstance(inst);
    }*/
}

From source file:GrowTree.java

public boolean homogeneous(Instances D) {
    distribution = new double[D.numClasses()];
    Enumeration eninst = D.enumerateInstances();
    while (eninst.hasMoreElements()) {
        Instance ele = (Instance) eninst.nextElement();
        distribution[(int) ele.classValue()]++;
    }/* w ww  . j a  v a 2  s. c  o m*/

    int cnt = 0;
    for (int i = 0; i < D.numClasses(); i++) {
        if (distribution[i] > 0)
            cnt++;
    }
    if (cnt <= 1) // if all instances are of single class
        return true;
    else
        return false;
}

From source file:GrowTree.java

double label(Instances D) {
    Enumeration eninst = D.enumerateInstances();

    Instance ele = (Instance) eninst.nextElement();
    return ele.classValue();
}

From source file:GrowTree.java

public double imp(Instances data) {
    double localdistribution[] = new double[data.numClasses()];
    Enumeration eninst = data.enumerateInstances();
    while (eninst.hasMoreElements()) {
        Instance ele = (Instance) eninst.nextElement();
        localdistribution[(int) ele.classValue()]++;
    }/* w w  w. j  av  a 2  s .com*/

    return imp;
}

From source file:SMO.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems./*from w w  w .  j av a  2  s  .  c  o m*/
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);

        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();

        /* Removes all the instances with weight equal to 0.
         MUST be done since condition (8) of Keerthi's paper 
         is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }

        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}

From source file:ID3Chi.java

License:Open Source License

private void MakeALeaf(Instances data) {

    data.deleteWithMissing(m_Attribute);

    if (data.numInstances() == 0) {
        SetNullDistribution(data);//  w  w  w . ja  v  a 2 s.  c  o  m
        return;
    }

    m_Distribution = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        m_Distribution[(int) inst.classValue()]++;
    }
    Utils.normalize(m_Distribution);
    m_ClassValue = Utils.maxIndex(m_Distribution);
    m_ClassAttribute = data.classAttribute();

    // set m_Attribute to null to mark this node as a leaf
    m_Attribute = null;
}