Example usage for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:cezeri.feature.selection.FeatureSelectionInfluence.java

private static double[] getAttributeValues(Instances test) {
    int n = test.numInstances();
    double[] ret = new double[n];
    for (int i = 0; i < n; i++) {
        Instance ins = test.instance(i);
        ret[i] = ins.classValue();
    }// w  ww . j av  a 2s.  c o  m
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

/**
 *
 * @param m tm dataset/*from  ww  w.ja  v  a2 s . c om*/
 * @param val class value deeri val olanlar filtrele
 * @return
 */
public static Instances[] getSpecificInstancesBasedOnClassValue(Instances m, String[] cl) {
    Instances[] ret = new Instances[cl.length];
    for (int i = 0; i < ret.length; i++) {
        ret[i] = FactoryInstance.generateInstances(m.relationName() + "_class=" + cl[i], m.numAttributes());
        //            ret[i] = m.resampleWithWeights(new Random());
        ret[i].delete();
    }
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);
        for (int j = 0; j < cl.length; j++) {
            if (("" + (int) ins.classValue()).equals(cl[j])) {
                ret[j].add(ins);
            }
        }
    }
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static double[] getClassData(Instances m) {
    double[] ret = new double[m.numInstances()];
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);
        ret[i] = ins.classValue();
    }//from  w ww  .j  ava  2 s.  co  m
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static double[] getClassData(Instances m, int val) {
    Vector v = new Vector();
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);
        if ((int) ins.classValue() == val) {
            v.add(ins.classValue());/*from   w  w w  .  j a va 2  s.c  om*/
        }
    }
    double[] ret = FactoryUtils.toDoubleArray(v);
    return ret;
}

From source file:CGLSMethod.LinearRegression.java

License:Open Source License

/**
 * Calculate a linear regression using the selected attributes
 *
 * @param selectedAttributes an array of booleans where each element
 * is true if the corresponding attribute should be included in the
 * regression.// w w w.  ja  v a 2s  .  co  m
 * @return an array of coefficients for the linear regression model.
 * @throws Exception if an error occurred during the regression.
 */
private double[] doRegression(boolean[] selectedAttributes) throws Exception {

    if (b_Debug) {
        System.out.print("doRegression(");
        for (int i = 0; i < selectedAttributes.length; i++) {
            System.out.print(" " + selectedAttributes[i]);
        }
        System.out.println(" )");
    }
    int numAttributes = 0;
    for (int i = 0; i < selectedAttributes.length; i++) {
        if (selectedAttributes[i]) {
            numAttributes++;
        }
    }

    // Check whether there are still attributes left
    Matrix independent = null, dependent = null;
    double[] weights = null;
    if (numAttributes > 0) {
        independent = new Matrix(m_TransformedData.numInstances(), numAttributes);
        dependent = new Matrix(m_TransformedData.numInstances(), 1);
        for (int i = 0; i < m_TransformedData.numInstances(); i++) {
            Instance inst = m_TransformedData.instance(i);
            int column = 0;
            for (int j = 0; j < m_TransformedData.numAttributes(); j++) {
                if (j == m_ClassIndex) {
                    dependent.setElement(i, 0, inst.classValue());
                } else {
                    if (selectedAttributes[j]) {
                        double value = inst.value(j) - m_Means[j];

                        // We only need to do this if we want to
                        // scale the input
                        if (!m_checksTurnedOff) {
                            value /= m_StdDevs[j];
                        }
                        independent.setElement(i, column, value);
                        column++;
                    }
                }
            }
        }

        // Grab instance weights
        weights = new double[m_TransformedData.numInstances()];
        for (int i = 0; i < weights.length; i++) {
            weights[i] = m_TransformedData.instance(i).weight();
        }
    }

    // Compute coefficients (note that we have to treat the
    // intercept separately so that it doesn't get affected
    // by the ridge constant.)
    double[] coefficients = new double[numAttributes + 1];
    if (numAttributes > 0) {
        double[] coeffsWithoutIntercept = independent.regression(dependent, weights, m_Ridge);
        System.arraycopy(coeffsWithoutIntercept, 0, coefficients, 0, numAttributes);
    }
    coefficients[numAttributes] = m_ClassMean;

    // Convert coefficients into original scale
    int column = 0;
    for (int i = 0; i < m_TransformedData.numAttributes(); i++) {
        if ((i != m_TransformedData.classIndex()) && (selectedAttributes[i])) {

            // We only need to do this if we have scaled the
            // input.
            if (!m_checksTurnedOff) {
                coefficients[column] /= m_StdDevs[i];
            }

            // We have centred the input
            coefficients[coefficients.length - 1] -= coefficients[column] * m_Means[i];
            column++;
        }
    }

    return coefficients;
}

From source file:ChiSquare.ChiSquaredAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully/*from ww w  .  ja  va2s.c om*/
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false);
        }
    }
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * determines the dictionary.//www  . j a va  2s.c  o m
 */
private void determineDictionary() {
    if (forcedAttributes == null) {
        // initialize stopwords
        Stopwords stopwords = new Stopwords();
        if (getUseStoplist()) {
            try {
                if (getStopwords().exists() && !getStopwords().isDirectory())
                    stopwords.read(getStopwords());
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        // Operate on a per-class basis if class attribute is set
        int classInd = getInputFormat().classIndex();
        int values = 1;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            values = getInputFormat().attribute(classInd).numValues();
        }

        // TreeMap dictionaryArr [] = new TreeMap[values];
        TreeMap[] dictionaryArr = new TreeMap[values];
        for (int i = 0; i < values; i++) {
            dictionaryArr[i] = new TreeMap();
        }

        // Make sure we know which fields to convert
        determineSelectedRange();

        // Tokenize all training text into an orderedMap of "words".
        long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
            Instance instance = getInputFormat().instance(i);
            int vInd = 0;
            if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
                vInd = (int) instance.classValue();
            }

            // Iterate through all relevant string attributes of the current
            // instance
            Hashtable h = new Hashtable();
            for (int j = 0; j < instance.numAttributes(); j++) {
                if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                    // Get tokenizer
                    m_Tokenizer.tokenize(instance.stringValue(j));

                    // Iterate through tokens, perform stemming, and remove
                    // stopwords
                    // (if required)
                    while (m_Tokenizer.hasMoreElements()) {
                        String word = ((String) m_Tokenizer.nextElement()).intern();

                        if (this.m_lowerCaseTokens == true)
                            word = word.toLowerCase();

                        word = m_Stemmer.stem(word);

                        if (this.m_useStoplist == true)
                            if (stopwords.is(word))
                                continue;

                        if (!(h.contains(word)))
                            h.put(word, new Integer(0));

                        Count count = (Count) dictionaryArr[vInd].get(word);
                        if (count == null) {
                            dictionaryArr[vInd].put(word, new Count(1));
                        } else {
                            count.count++;
                        }
                    }
                }
            }

            // updating the docCount for the words that have occurred in
            // this
            // instance(document).
            Enumeration e = h.keys();
            while (e.hasMoreElements()) {
                String word = (String) e.nextElement();
                Count c = (Count) dictionaryArr[vInd].get(word);
                if (c != null) {
                    c.docCount++;
                } else
                    System.err.println("Warning: A word should definitely be in the "
                            + "dictionary.Please check the code");
            }

            if (pruneRate > 0) {
                if (i % pruneRate == 0 && i > 0) {
                    for (int z = 0; z < values; z++) {
                        Vector d = new Vector(1000);
                        Iterator it = dictionaryArr[z].keySet().iterator();
                        while (it.hasNext()) {
                            String word = (String) it.next();
                            Count count = (Count) dictionaryArr[z].get(word);
                            if (count.count <= 1) {
                                d.add(word);
                            }
                        }
                        Iterator iter = d.iterator();
                        while (iter.hasNext()) {
                            String word = (String) iter.next();
                            dictionaryArr[z].remove(word);
                        }
                    }
                }
            }
        }

        // Figure out the minimum required word frequency
        int totalsize = 0;
        int prune[] = new int[values];
        for (int z = 0; z < values; z++) {
            totalsize += dictionaryArr[z].size();

            int array[] = new int[dictionaryArr[z].size()];
            int pos = 0;
            Iterator it = dictionaryArr[z].keySet().iterator();
            while (it.hasNext()) {
                String word = (String) it.next();
                Count count = (Count) dictionaryArr[z].get(word);
                array[pos] = count.count;
                pos++;
            }

            // sort the array
            sortArray(array);
            if (array.length < m_WordsToKeep) {
                // if there aren't enough words, set the threshold to
                // minFreq
                prune[z] = m_minTermFreq;
            } else {
                // otherwise set it to be at least minFreq
                prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
            }
        }

        // Convert the dictionary into an attribute index
        // and create one attribute per word
        FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

        // Add the non-converted attributes
        int classIndex = -1;
        for (int i = 0; i < getInputFormat().numAttributes(); i++) {
            if (!m_SelectedRange.isInRange(i)) {
                if (getInputFormat().classIndex() == i) {
                    classIndex = attributes.size();
                }
                attributes.addElement(getInputFormat().attribute(i).copy());
            }
        }

        // Add the word vector attributes (eliminating duplicates
        // that occur in multiple classes)
        TreeMap newDictionary = new TreeMap();
        int index = attributes.size();
        for (int z = 0; z < values; z++) {
            Iterator it = dictionaryArr[z].keySet().iterator();
            while (it.hasNext()) {
                String word = (String) it.next();
                Count count = (Count) dictionaryArr[z].get(word);
                if (count.count >= prune[z]) {
                    if (newDictionary.get(word) == null) {
                        newDictionary.put(word, new Integer(index++));
                        attributes.addElement(new Attribute(m_Prefix + word));
                    }
                }
            }
        }

        // Compute document frequencies
        m_DocsCounts = new int[attributes.size()];
        Iterator it = newDictionary.keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            int idx = ((Integer) newDictionary.get(word)).intValue();
            int docsCount = 0;
            for (int j = 0; j < values; j++) {
                Count c = (Count) dictionaryArr[j].get(word);
                if (c != null)
                    docsCount += c.docCount;
            }
            m_DocsCounts[idx] = docsCount;
        }

        // Trim vector and set instance variables
        attributes.trimToSize();
        m_Dictionary = newDictionary;
        m_NumInstances = getInputFormat().numInstances();

        // Set the filter's output format
        Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
        outputFormat.setClassIndex(classIndex);
        setOutputFormat(outputFormat);
    } else {
        //m_Dictionary = newDictionary;
        determineSelectedRange();
        m_NumInstances = getInputFormat().numInstances();

        TreeMap newDictionary = new TreeMap();
        for (int i = 2; i < forcedAttributes.size(); i++) {
            newDictionary.put(((Attribute) forcedAttributes.get(i)).name(), new Integer(i));
        }
        m_Dictionary = newDictionary;

        // Set the filter's output format
        Instances outputFormat = new Instances(getInputFormat().relationName(), forcedAttributes, 0);
        outputFormat.setClassIndex(1);
        setOutputFormat(outputFormat);
    }
}

From source file:Classifier.supervised.LinearRegression.java

License:Open Source License

/**
 * Calculate a linear regression using the selected attributes
 *
 * @param selectedAttributes an array of booleans where each element
 * is true if the corresponding attribute should be included in the
 * regression.//from  w ww .jav a 2s. com
 * @return an array of coefficients for the linear regression model.
 * @throws Exception if an error occurred during the regression.
 */
protected double[] doRegression(boolean[] selectedAttributes) throws Exception {

    if (m_Debug) {
        System.out.print("doRegression(");
        for (int i = 0; i < selectedAttributes.length; i++) {
            System.out.print(" " + selectedAttributes[i]);
        }
        System.out.println(" )");
    }
    int numAttributes = 0;
    for (int i = 0; i < selectedAttributes.length; i++) {
        if (selectedAttributes[i]) {
            numAttributes++;
        }
    }

    // Check whether there are still attributes left
    Matrix independent = null, dependent = null;
    if (numAttributes > 0) {
        independent = new Matrix(m_TransformedData.numInstances(), numAttributes);
        dependent = new Matrix(m_TransformedData.numInstances(), 1);
        for (int i = 0; i < m_TransformedData.numInstances(); i++) {
            Instance inst = m_TransformedData.instance(i);
            double sqrt_weight = Math.sqrt(inst.weight());
            int column = 0;
            for (int j = 0; j < m_TransformedData.numAttributes(); j++) {
                if (j == m_ClassIndex) {
                    dependent.set(i, 0, inst.classValue() * sqrt_weight);
                } else {
                    if (selectedAttributes[j]) {
                        double value = inst.value(j) - m_Means[j];

                        // We only need to do this if we want to
                        // scale the input
                        if (!m_checksTurnedOff) {
                            value /= m_StdDevs[j];
                        }
                        independent.set(i, column, value * sqrt_weight);
                        column++;
                    }
                }
            }
        }
    }

    // Compute coefficients (note that we have to treat the
    // intercept separately so that it doesn't get affected
    // by the ridge constant.)
    double[] coefficients = new double[numAttributes + 1];
    if (numAttributes > 0) {
        double[] coeffsWithoutIntercept = independent.regression(dependent, m_Ridge).getCoefficients();
        System.arraycopy(coeffsWithoutIntercept, 0, coefficients, 0, numAttributes);
    }
    coefficients[numAttributes] = m_ClassMean;

    // Convert coefficients into original scale
    int column = 0;
    for (int i = 0; i < m_TransformedData.numAttributes(); i++) {
        if ((i != m_TransformedData.classIndex()) && (selectedAttributes[i])) {

            // We only need to do this if we have scaled the
            // input.
            if (!m_checksTurnedOff) {
                coefficients[column] /= m_StdDevs[i];
            }

            // We have centred the input
            coefficients[coefficients.length - 1] -= coefficients[column] * m_Means[i];
            column++;
        }
    }

    return coefficients;
}

From source file:classifiers.ComplexClassifier.java

@Override
public boolean Classify(Instance in) {
    double[][] probabilities = new double[Modelmenge.numAttributes()][Modelmenge.numClasses()];
    Enumeration<Attribute> enu = Modelmenge.enumerateAttributes();
    int attindex = 0;
    while (enu.hasMoreElements()) {
        Attribute att = enu.nextElement();

        for (int i = 0; i < Modelmenge.numClasses(); i++) {

            if (att.index() < list.size()) {
                probabilities[att.index()][i] = list.get(att.index()).getProbs(in);
            }/*  www .  j  a  va 2  s .com*/

        }

        attindex++;
    }

    for (int i = 0; i < Modelmenge.numClasses(); i++) {
        for (int j = 0; j < Modelmenge.numAttributes(); j++) {
            Classparam[i] *= probabilities[j][i];
        }
    }

    return (Maxindex(Classparam) == in.classValue());
}

From source file:classifiers.ComplexClassifierZufall.java

@Override
public boolean Classify(Instance in) {

    double[][] probabilities = new double[Modelmenge.numAttributes()][Modelmenge.numClasses()];
    Enumeration<Attribute> enu = Modelmenge.enumerateAttributes();
    int attindex = 0;
    while (enu.hasMoreElements()) {
        Attribute att = enu.nextElement();

        for (int i = 0; i < Modelmenge.numClasses(); i++) {

            if (att.index() < list.size()) {
                probabilities[att.index()][i] = list.get(att.index()).getProbs(in);
            }//from  w  w w  .  j av a2 s.  c om

        }

        attindex++;
    }

    for (int i = 0; i < Modelmenge.numClasses(); i++) {
        for (int j = 0; j < Modelmenge.numAttributes(); j++) {
            Classparam[i] *= probabilities[j][i];
        }
    }

    return (Maxindex(Classparam) == in.classValue());
}