Example usage for weka.core Instance classValue

List of usage examples for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:j48.Distribution.java

License:Open Source License

/**
 * Subtracts given instance from given bag.
 *
 * @exception Exception if something goes wrong
 *//*from www  . j  a v a2  s .c o  m*/
public final void sub(int bagIndex, Instance instance) throws Exception {

    int classIndex;
    double weight;

    classIndex = (int) instance.classValue();
    weight = instance.weight();
    m_perClassPerBag[bagIndex][classIndex] = m_perClassPerBag[bagIndex][classIndex] - weight;
    m_perBag[bagIndex] = m_perBag[bagIndex] - weight;
    m_perClass[classIndex] = m_perClass[classIndex] - weight;
    totaL = totaL - weight;
}

From source file:j48.Distribution.java

License:Open Source License

/**
 * Adds all instances with unknown values for given attribute, weighted
 * according to frequency of instances in each bag.
 *
 * @exception Exception if something goes wrong
 *//*from w ww .j a  va  2 s  .  c  o m*/
public final void addInstWithUnknown(Instances source, int attIndex) throws Exception {

    double[] probs;
    double weight, newWeight;
    int classIndex;
    Instance instance;
    int j;

    probs = new double[m_perBag.length];
    for (j = 0; j < m_perBag.length; j++) {
        if (Utils.eq(totaL, 0)) {
            probs[j] = 1.0 / probs.length;
        } else {
            probs[j] = m_perBag[j] / totaL;
        }
    }
    Enumeration enu = source.enumerateInstances();
    while (enu.hasMoreElements()) {
        instance = (Instance) enu.nextElement();
        if (instance.isMissing(attIndex)) {
            classIndex = (int) instance.classValue();
            weight = instance.weight();
            m_perClass[classIndex] = m_perClass[classIndex] + weight;
            totaL = totaL + weight;
            for (j = 0; j < m_perBag.length; j++) {
                newWeight = probs[j] * weight;
                m_perClassPerBag[j][classIndex] = m_perClassPerBag[j][classIndex] + newWeight;
                m_perBag[j] = m_perBag[j] + newWeight;
            }
        }
    }
}

From source file:j48.Distribution.java

License:Open Source License

/**
 * Adds all instances in given range to given bag.
 *
 * @exception Exception if something goes wrong
 *//*from   www  . j a  v  a 2s .c o m*/
public final void addRange(int bagIndex, Instances source, int startIndex, int lastPlusOne) throws Exception {

    double sumOfWeights = 0;
    int classIndex;
    Instance instance;
    int i;

    for (i = startIndex; i < lastPlusOne; i++) {
        instance = (Instance) source.instance(i);
        classIndex = (int) instance.classValue();
        sumOfWeights = sumOfWeights + instance.weight();
        m_perClassPerBag[bagIndex][classIndex] += instance.weight();
        m_perClass[classIndex] += instance.weight();
    }
    m_perBag[bagIndex] += sumOfWeights;
    totaL += sumOfWeights;
}

From source file:j48.Distribution.java

License:Open Source License

/**
 * Adds given instance to all bags weighting it according to given weights.
 *
 * @exception Exception if something goes wrong
 *///from  w ww  .j  a  v  a2  s . c o  m
public final void addWeights(Instance instance, double[] weights) throws Exception {

    int classIndex;
    int i;

    classIndex = (int) instance.classValue();
    for (i = 0; i < m_perBag.length; i++) {
        double weight = instance.weight() * weights[i];
        m_perClassPerBag[i][classIndex] = m_perClassPerBag[i][classIndex] + weight;
        m_perBag[i] = m_perBag[i] + weight;
        m_perClass[classIndex] = m_perClass[classIndex] + weight;
        totaL = totaL + weight;
    }
}

From source file:j48.Distribution.java

License:Open Source License

/**
 * Deletes given instance from given bag.
 *
 * @exception Exception if something goes wrong
 *//*from w  w  w . ja  v  a  2s  .  c o  m*/
public final void del(int bagIndex, Instance instance) throws Exception {

    int classIndex;
    double weight;

    classIndex = (int) instance.classValue();
    weight = instance.weight();
    m_perClassPerBag[bagIndex][classIndex] = m_perClassPerBag[bagIndex][classIndex] - weight;
    m_perBag[bagIndex] = m_perBag[bagIndex] - weight;
    m_perClass[classIndex] = m_perClass[classIndex] - weight;
    totaL = totaL - weight;
}

From source file:j48.Distribution.java

License:Open Source License

/**
 * Deletes all instances in given range from given bag.
 *
 * @exception Exception if something goes wrong
 *//*from  w w w  . j a va  2s .  com*/
public final void delRange(int bagIndex, Instances source, int startIndex, int lastPlusOne) throws Exception {

    double sumOfWeights = 0;
    int classIndex;
    Instance instance;
    int i;

    for (i = startIndex; i < lastPlusOne; i++) {
        instance = (Instance) source.instance(i);
        classIndex = (int) instance.classValue();
        sumOfWeights = sumOfWeights + instance.weight();
        m_perClassPerBag[bagIndex][classIndex] -= instance.weight();
        m_perClass[classIndex] -= instance.weight();
    }
    m_perBag[bagIndex] -= sumOfWeights;
    totaL -= sumOfWeights;
}

From source file:j48.Distribution.java

License:Open Source License

/**
 * Shifts given instance from one bag to another one.
 *
 * @exception Exception if something goes wrong
 *///from w w w . ja  va2 s .  c  om
public final void shift(int from, int to, Instance instance) throws Exception {

    int classIndex;
    double weight;

    classIndex = (int) instance.classValue();
    weight = instance.weight();
    m_perClassPerBag[from][classIndex] -= weight;
    m_perClassPerBag[to][classIndex] += weight;
    m_perBag[from] -= weight;
    m_perBag[to] += weight;
}

From source file:j48.Distribution.java

License:Open Source License

/**
 * Shifts all instances in given range from one bag to another one.
 *
 * @exception Exception if something goes wrong
 *//*from  ww w.j a va  2 s  .  c  o m*/
public final void shiftRange(int from, int to, Instances source, int startIndex, int lastPlusOne)
        throws Exception {

    int classIndex;
    double weight;
    Instance instance;
    int i;

    for (i = startIndex; i < lastPlusOne; i++) {
        instance = (Instance) source.instance(i);
        classIndex = (int) instance.classValue();
        weight = instance.weight();
        m_perClassPerBag[from][classIndex] -= weight;
        m_perClassPerBag[to][classIndex] += weight;
        m_perBag[from] -= weight;
        m_perBag[to] += weight;
    }
}

From source file:kea.KEAFilter.java

License:Open Source License

/**
 * Converts an instance.//  w ww  . j  av a  2s  . co  m
 */
private FastVector convertInstance(Instance instance, boolean training) throws Exception {

    FastVector vector = new FastVector();

    if (m_Debug) {
        System.err.println("-- Converting instance");
    }

    // Get the key phrases for the document
    HashMap hashKeyphrases = null;
    HashMap hashKeysEval = null;
    if (!instance.isMissing(m_KeyphrasesAtt)) {
        String keyphrases = instance.stringValue(m_KeyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases, false);
        hashKeysEval = getGivenKeyphrases(keyphrases, true);
    }

    // Get the phrases for the document
    HashMap hash = new HashMap();
    int length = getPhrases(hash, instance.stringValue(m_DocumentAtt));

    // Compute number of extra attributes
    int numFeatures = 5;
    if (m_Debug) {
        if (m_KFused) {
            numFeatures = numFeatures + 1;
        }
    }

    // Set indices of key attributes
    int phraseAttIndex = m_DocumentAtt;
    int tfidfAttIndex = m_DocumentAtt + 2;
    int distAttIndex = m_DocumentAtt + 3;
    int probsAttIndex = m_DocumentAtt + numFeatures - 1;

    // Go through the phrases and convert them into instances
    Iterator it = hash.keySet().iterator();
    while (it.hasNext()) {
        String phrase = (String) it.next();
        FastVector phraseInfo = (FastVector) hash.get(phrase);
        double[] vals = featVals(phrase, phraseInfo, training, hashKeysEval, hashKeyphrases, length);
        Instance inst = new Instance(instance.weight(), vals);
        inst.setDataset(m_ClassifierData);

        // Get probability of phrase being key phrase
        double[] probs = m_Classifier.distributionForInstance(inst);
        double prob = probs[1];

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures];
        int pos = 0;
        for (int i = 0; i < instance.numAttributes(); i++) {
            if (i == m_DocumentAtt) {

                // Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(phrase);
                newInst[pos++] = index;

                // Add original version
                index = outputFormatPeek().attribute(pos).addStringValue((String) phraseInfo.elementAt(2));
                newInst[pos++] = index;

                // Add TFxIDF
                newInst[pos++] = inst.value(m_TfidfIndex);

                // Add distance
                newInst[pos++] = inst.value(m_FirstOccurIndex);

                // Add other features
                if (m_Debug) {
                    if (m_KFused) {
                        newInst[pos++] = inst.value(m_KeyFreqIndex);
                    }
                }

                // Add probability 
                probsAttIndex = pos;
                newInst[pos++] = prob;

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue();
            } else if (i == m_KeyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }
        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);
    }

    // Add dummy instances for keyphrases that don't occur
    // in the document
    if (hashKeysEval != null) {
        Iterator phrases = hashKeysEval.keySet().iterator();
        while (phrases.hasNext()) {
            String phrase = (String) phrases.next();
            double[] newInst = new double[instance.numAttributes() + numFeatures];
            int pos = 0;
            for (int i = 0; i < instance.numAttributes(); i++) {
                if (i == m_DocumentAtt) {

                    // Add phrase
                    int index = outputFormatPeek().attribute(pos).addStringValue(phrase);
                    newInst[pos++] = (double) index;

                    // Add original version
                    index = outputFormatPeek().attribute(pos).addStringValue((String) hashKeysEval.get(phrase));
                    newInst[pos++] = (double) index;

                    // Add TFxIDF
                    newInst[pos++] = Instance.missingValue();

                    // Add distance
                    newInst[pos++] = Instance.missingValue();

                    // Add other features
                    if (m_Debug) {
                        if (m_KFused) {
                            newInst[pos++] = Instance.missingValue();
                        }
                    }

                    // Add probability and rank
                    newInst[pos++] = -Double.MAX_VALUE;
                    newInst[pos++] = Instance.missingValue();
                } else if (i == m_KeyphrasesAtt) {
                    newInst[pos++] = 1; // Keyphrase
                } else {
                    newInst[pos++] = instance.value(i);
                }
            }
            Instance inst = new Instance(instance.weight(), newInst);
            inst.setDataset(outputFormatPeek());
            vector.addElement(inst);
        }
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);

        // Short cut: if phrase very unlikely make rank very low and continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        String val = currentInstance.stringValue(phraseAttIndex);
        boolean foundSuperphrase = false;
        for (int j = startInd - 1; j >= 0; j--) {
            if (j != i) {
                Instance candidate = (Instance) vector.elementAt(j);
                String potSuperphrase = candidate.stringValue(phraseAttIndex);
                if (val.length() <= potSuperphrase.length()) {
                    if (KEAFilter.contains(val, potSuperphrase)) {
                        foundSuperphrase = true;
                        break;
                    }
                }
            }
        }
        if (foundSuperphrase) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
        } else {
            currentInstance.setValue(probsAttIndex + 1, rank++);
        }
    }
    return vector;
}

From source file:LogReg.Logistic.java

License:Open Source License

/**
 * Builds the classifier/*  ww w . j ava  2 s .c o  m*/
 *
 * @param train the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances train) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(train);

    // remove instances with missing class
    train = new Instances(train);
    train.deleteWithMissingClass();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(train);
    train = Filter.useFilter(train, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(train);
    train = Filter.useFilter(train, m_AttFilter);

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(train);
    train = Filter.useFilter(train, m_NominalToBinary);

    // Save the structure for printing the model
    m_structure = new Instances(train, 0);

    // Extract data
    m_ClassIndex = train.classIndex();
    m_NumClasses = train.numClasses();

    int nK = m_NumClasses - 1; // Only K-1 class labels needed 
    int nR = m_NumPredictors = train.numAttributes() - 1;
    int nC = train.numInstances();

    m_Data = new double[nC][nR + 1]; // Data values
    int[] Y = new int[nC]; // Class labels
    double[] xMean = new double[nR + 1]; // Attribute means
    xSD = new double[nR + 1]; // Attribute stddev's
    double[] sY = new double[nK + 1]; // Number of classes
    double[] weights = new double[nC]; // Weights of instances
    double totWeights = 0; // Total weights of the instances
    m_Par = new double[nR + 1][nK]; // Optimized parameter values

    if (m_Debug) {
        System.out.println("Extracting data...");
    }

    for (int i = 0; i < nC; i++) {
        // initialize X[][]
        Instance current = train.instance(i);
        Y[i] = (int) current.classValue(); // Class value starts from 0
        weights[i] = current.weight(); // Dealing with weights
        totWeights += weights[i];

        m_Data[i][0] = 1;
        int j = 1;
        for (int k = 0; k <= nR; k++) {
            if (k != m_ClassIndex) {
                double x = current.value(k);
                m_Data[i][j] = x;
                xMean[j] += weights[i] * x;
                xSD[j] += weights[i] * x * x;
                j++;
            }
        }

        // Class count
        sY[Y[i]]++;
    }

    if ((totWeights <= 1) && (nC > 1))
        throw new Exception("Sum of weights of instances less than 1, please reweight!");

    xMean[0] = 0;
    xSD[0] = 1;
    for (int j = 1; j <= nR; j++) {
        xMean[j] = xMean[j] / totWeights;
        if (totWeights > 1)
            xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1));
        else
            xSD[j] = 0;
    }

    if (m_Debug) {
        // Output stats about input data
        System.out.println("Descriptives...");
        for (int m = 0; m <= nK; m++)
            System.out.println(sY[m] + " cases have class " + m);
        System.out.println("\n Variable     Avg       SD    ");
        for (int j = 1; j <= nR; j++)
            System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4)
                    + Utils.doubleToString(xSD[j], 10, 4));
    }

    // Normalise input data 
    for (int i = 0; i < nC; i++) {
        for (int j = 0; j <= nR; j++) {
            if (xSD[j] != 0) {
                m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j];
            }
        }
    }

    if (m_Debug) {
        System.out.println("\nIteration History...");
    }

    double x[] = new double[(nR + 1) * nK];
    double[][] b = new double[2][x.length]; // Boundary constraints, N/A here

    // Initialize
    for (int p = 0; p < nK; p++) {
        int offset = p * (nR + 1);
        x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0); // Null model
        b[0][offset] = Double.NaN;
        b[1][offset] = Double.NaN;
        for (int q = 1; q <= nR; q++) {
            x[offset + q] = 0.0;
            b[0][offset + q] = Double.NaN;
            b[1][offset + q] = Double.NaN;
        }
    }

    OptEng opt = new OptEng();
    opt.setDebug(m_Debug);
    opt.setWeights(weights);
    opt.setClassLabels(Y);

    if (m_MaxIts == -1) { // Search until convergence
        x = opt.findArgmin(x, b);
        while (x == null) {
            x = opt.getVarbValues();
            if (m_Debug)
                System.out.println("200 iterations finished, not enough!");
            x = opt.findArgmin(x, b);
        }
        if (m_Debug)
            System.out.println(" -------------<Converged>--------------");
    } else {
        opt.setMaxIteration(m_MaxIts);
        x = opt.findArgmin(x, b);
        if (x == null) // Not enough, but use the current value
            x = opt.getVarbValues();
    }

    m_LL = -opt.getMinFunction(); // Log-likelihood

    // Don't need data matrix anymore
    m_Data = null;

    // Convert coefficients back to non-normalized attribute units
    for (int i = 0; i < nK; i++) {
        m_Par[0][i] = x[i * (nR + 1)];
        for (int j = 1; j <= nR; j++) {
            m_Par[j][i] = x[i * (nR + 1) + j];
            if (xSD[j] != 0) {
                m_Par[j][i] /= xSD[j];
                m_Par[0][i] -= m_Par[j][i] * xMean[j];
            }
        }
    }
}