Example usage for weka.core Instance classValue

List of usage examples for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:cezeri.feature.selection.FeatureSelectionInfluence.java

private static double[] getAttributeValues(Instances test) {
    int n = test.numInstances();
    double[] ret = new double[n];
    for (int i = 0; i < n; i++) {
        Instance ins = test.instance(i);
        ret[i] = ins.classValue();
    }// w  ww . j av  a 2s.  c o  m
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

/**
 *
 * @param m tm dataset/*from  ww  w.ja  v  a2 s . c om*/
 * @param val class value deeri val olanlar filtrele
 * @return
 */
public static Instances[] getSpecificInstancesBasedOnClassValue(Instances m, String[] cl) {
    Instances[] ret = new Instances[cl.length];
    for (int i = 0; i < ret.length; i++) {
        ret[i] = FactoryInstance.generateInstances(m.relationName() + "_class=" + cl[i], m.numAttributes());
        //            ret[i] = m.resampleWithWeights(new Random());
        ret[i].delete();
    }
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);
        for (int j = 0; j < cl.length; j++) {
            if (("" + (int) ins.classValue()).equals(cl[j])) {
                ret[j].add(ins);
            }
        }
    }
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static double[] getClassData(Instances m) {
    double[] ret = new double[m.numInstances()];
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);
        ret[i] = ins.classValue();
    }//from  w ww  .j  ava  2 s.  co  m
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static double[] getClassData(Instances m, int val) {
    Vector v = new Vector();
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);
        if ((int) ins.classValue() == val) {
            v.add(ins.classValue());/*from   w  w w  .  j a va 2  s.c  om*/
        }
    }
    double[] ret = FactoryUtils.toDoubleArray(v);
    return ret;
}

From source file:CGLSMethod.LinearRegression.java

License:Open Source License

/**
 * Calculate a linear regression using the selected attributes
 *
 * @param selectedAttributes an array of booleans where each element
 * is true if the corresponding attribute should be included in the
 * regression.// w w w.  ja  v a 2s  .  co  m
 * @return an array of coefficients for the linear regression model.
 * @throws Exception if an error occurred during the regression.
 */
private double[] doRegression(boolean[] selectedAttributes) throws Exception {

    if (b_Debug) {
        System.out.print("doRegression(");
        for (int i = 0; i < selectedAttributes.length; i++) {
            System.out.print(" " + selectedAttributes[i]);
        }
        System.out.println(" )");
    }
    int numAttributes = 0;
    for (int i = 0; i < selectedAttributes.length; i++) {
        if (selectedAttributes[i]) {
            numAttributes++;
        }
    }

    // Check whether there are still attributes left
    Matrix independent = null, dependent = null;
    double[] weights = null;
    if (numAttributes > 0) {
        independent = new Matrix(m_TransformedData.numInstances(), numAttributes);
        dependent = new Matrix(m_TransformedData.numInstances(), 1);
        for (int i = 0; i < m_TransformedData.numInstances(); i++) {
            Instance inst = m_TransformedData.instance(i);
            int column = 0;
            for (int j = 0; j < m_TransformedData.numAttributes(); j++) {
                if (j == m_ClassIndex) {
                    dependent.setElement(i, 0, inst.classValue());
                } else {
                    if (selectedAttributes[j]) {
                        double value = inst.value(j) - m_Means[j];

                        // We only need to do this if we want to
                        // scale the input
                        if (!m_checksTurnedOff) {
                            value /= m_StdDevs[j];
                        }
                        independent.setElement(i, column, value);
                        column++;
                    }
                }
            }
        }

        // Grab instance weights
        weights = new double[m_TransformedData.numInstances()];
        for (int i = 0; i < weights.length; i++) {
            weights[i] = m_TransformedData.instance(i).weight();
        }
    }

    // Compute coefficients (note that we have to treat the
    // intercept separately so that it doesn't get affected
    // by the ridge constant.)
    double[] coefficients = new double[numAttributes + 1];
    if (numAttributes > 0) {
        double[] coeffsWithoutIntercept = independent.regression(dependent, weights, m_Ridge);
        System.arraycopy(coeffsWithoutIntercept, 0, coefficients, 0, numAttributes);
    }
    coefficients[numAttributes] = m_ClassMean;

    // Convert coefficients into original scale
    int column = 0;
    for (int i = 0; i < m_TransformedData.numAttributes(); i++) {
        if ((i != m_TransformedData.classIndex()) && (selectedAttributes[i])) {

            // We only need to do this if we have scaled the
            // input.
            if (!m_checksTurnedOff) {
                coefficients[column] /= m_StdDevs[i];
            }

            // We have centred the input
            coefficients[coefficients.length - 1] -= coefficients[column] * m_Means[i];
            column++;
        }
    }

    return coefficients;
}

From source file:ChiSquare.ChiSquaredAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully/*from ww w  .  ja  va2s.c om*/
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false);
        }
    }
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * determines the dictionary.//www  . j a va  2s.c  o m
 */
private void determineDictionary() {
    if (forcedAttributes == null) {
        // initialize stopwords
        Stopwords stopwords = new Stopwords();
        if (getUseStoplist()) {
            try {
                if (getStopwords().exists() && !getStopwords().isDirectory())
                    stopwords.read(getStopwords());
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        // Operate on a per-class basis if class attribute is set
        int classInd = getInputFormat().classIndex();
        int values = 1;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            values = getInputFormat().attribute(classInd).numValues();
        }

        // TreeMap dictionaryArr [] = new TreeMap[values];
        TreeMap[] dictionaryArr = new TreeMap[values];
        for (int i = 0; i < values; i++) {
            dictionaryArr[i] = new TreeMap();
        }

        // Make sure we know which fields to convert
        determineSelectedRange();

        // Tokenize all training text into an orderedMap of "words".
        long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
            Instance instance = getInputFormat().instance(i);
            int vInd = 0;
            if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
                vInd = (int) instance.classValue();
            }

            // Iterate through all relevant string attributes of the current
            // instance
            Hashtable h = new Hashtable();
            for (int j = 0; j < instance.numAttributes(); j++) {
                if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                    // Get tokenizer
                    m_Tokenizer.tokenize(instance.stringValue(j));

                    // Iterate through tokens, perform stemming, and remove
                    // stopwords
                    // (if required)
                    while (m_Tokenizer.hasMoreElements()) {
                        String word = ((String) m_Tokenizer.nextElement()).intern();

                        if (this.m_lowerCaseTokens == true)
                            word = word.toLowerCase();

                        word = m_Stemmer.stem(word);

                        if (this.m_useStoplist == true)
                            if (stopwords.is(word))
                                continue;

                        if (!(h.contains(word)))
                            h.put(word, new Integer(0));

                        Count count = (Count) dictionaryArr[vInd].get(word);
                        if (count == null) {
                            dictionaryArr[vInd].put(word, new Count(1));
                        } else {
                            count.count++;
                        }
                    }
                }
            }

            // updating the docCount for the words that have occurred in
            // this
            // instance(document).
            Enumeration e = h.keys();
            while (e.hasMoreElements()) {
                String word = (String) e.nextElement();
                Count c = (Count) dictionaryArr[vInd].get(word);
                if (c != null) {
                    c.docCount++;
                } else
                    System.err.println("Warning: A word should definitely be in the "
                            + "dictionary.Please check the code");
            }

            if (pruneRate > 0) {
                if (i % pruneRate == 0 && i > 0) {
                    for (int z = 0; z < values; z++) {
                        Vector d = new Vector(1000);
                        Iterator it = dictionaryArr[z].keySet().iterator();
                        while (it.hasNext()) {
                            String word = (String) it.next();
                            Count count = (Count) dictionaryArr[z].get(word);
                            if (count.count <= 1) {
                                d.add(word);
                            }
                        }
                        Iterator iter = d.iterator();
                        while (iter.hasNext()) {
                            String word = (String) iter.next();
                            dictionaryArr[z].remove(word);
                        }
                    }
                }
            }
        }

        // Figure out the minimum required word frequency
        int totalsize = 0;
        int prune[] = new int[values];
        for (int z = 0; z < values; z++) {
            totalsize += dictionaryArr[z].size();

            int array[] = new int[dictionaryArr[z].size()];
            int pos = 0;
            Iterator it = dictionaryArr[z].keySet().iterator();
            while (it.hasNext()) {
                String word = (String) it.next();
                Count count = (Count) dictionaryArr[z].get(word);
                array[pos] = count.count;
                pos++;
            }

            // sort the array
            sortArray(array);
            if (array.length < m_WordsToKeep) {
                // if there aren't enough words, set the threshold to
                // minFreq
                prune[z] = m_minTermFreq;
            } else {
                // otherwise set it to be at least minFreq
                prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
            }
        }

        // Convert the dictionary into an attribute index
        // and create one attribute per word
        FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

        // Add the non-converted attributes
        int classIndex = -1;
        for (int i = 0; i < getInputFormat().numAttributes(); i++) {
            if (!m_SelectedRange.isInRange(i)) {
                if (getInputFormat().classIndex() == i) {
                    classIndex = attributes.size();
                }
                attributes.addElement(getInputFormat().attribute(i).copy());
            }
        }

        // Add the word vector attributes (eliminating duplicates
        // that occur in multiple classes)
        TreeMap newDictionary = new TreeMap();
        int index = attributes.size();
        for (int z = 0; z < values; z++) {
            Iterator it = dictionaryArr[z].keySet().iterator();
            while (it.hasNext()) {
                String word = (String) it.next();
                Count count = (Count) dictionaryArr[z].get(word);
                if (count.count >= prune[z]) {
                    if (newDictionary.get(word) == null) {
                        newDictionary.put(word, new Integer(index++));
                        attributes.addElement(new Attribute(m_Prefix + word));
                    }
                }
            }
        }

        // Compute document frequencies
        m_DocsCounts = new int[attributes.size()];
        Iterator it = newDictionary.keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            int idx = ((Integer) newDictionary.get(word)).intValue();
            int docsCount = 0;
            for (int j = 0; j < values; j++) {
                Count c = (Count) dictionaryArr[j].get(word);
                if (c != null)
                    docsCount += c.docCount;
            }
            m_DocsCounts[idx] = docsCount;
        }

        // Trim vector and set instance variables
        attributes.trimToSize();
        m_Dictionary = newDictionary;
        m_NumInstances = getInputFormat().numInstances();

        // Set the filter's output format
        Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
        outputFormat.setClassIndex(classIndex);
        setOutputFormat(outputFormat);
    } else {
        //m_Dictionary = newDictionary;
        determineSelectedRange();
        m_NumInstances = getInputFormat().numInstances();

        TreeMap newDictionary = new TreeMap();
        for (int i = 2; i < forcedAttributes.size(); i++) {
            newDictionary.put(((Attribute) forcedAttributes.get(i)).name(), new Integer(i));
        }
        m_Dictionary = newDictionary;

        // Set the filter's output format
        Instances outputFormat = new Instances(getInputFormat().relationName(), forcedAttributes, 0);
        outputFormat.setClassIndex(1);
        setOutputFormat(outputFormat);
    }
}

From source file:Classifier.supervised.LinearRegression.java

License:Open Source License

/**
 * Calculate a linear regression using the selected attributes
 *
 * @param selectedAttributes an array of booleans where each element
 * is true if the corresponding attribute should be included in the
 * regression.//from  w ww .jav a 2s. com
 * @return an array of coefficients for the linear regression model.
 * @throws Exception if an error occurred during the regression.
 */
protected double[] doRegression(boolean[] selectedAttributes) throws Exception {

    if (m_Debug) {
        System.out.print("doRegression(");
        for (int i = 0; i < selectedAttributes.length; i++) {
            System.out.print(" " + selectedAttributes[i]);
        }
        System.out.println(" )");
    }
    int numAttributes = 0;
    for (int i = 0; i < selectedAttributes.length; i++) {
        if (selectedAttributes[i]) {
            numAttributes++;
        }
    }

    // Check whether there are still attributes left
    Matrix independent = null, dependent = null;
    if (numAttributes > 0) {
        independent = new Matrix(m_TransformedData.numInstances(), numAttributes);
        dependent = new Matrix(m_TransformedData.numInstances(), 1);
        for (int i = 0; i < m_TransformedData.numInstances(); i++) {
            Instance inst = m_TransformedData.instance(i);
            double sqrt_weight = Math.sqrt(inst.weight());
            int column = 0;
            for (int j = 0; j < m_TransformedData.numAttributes(); j++) {
                if (j == m_ClassIndex) {
                    dependent.set(i, 0, inst.classValue() * sqrt_weight);
                } else {
                    if (selectedAttributes[j]) {
                        double value = inst.value(j) - m_Means[j];

                        // We only need to do this if we want to
                        // scale the input
                        if (!m_checksTurnedOff) {
                            value /= m_StdDevs[j];
                        }
                        independent.set(i, column, value * sqrt_weight);
                        column++;
                    }
                }
            }
        }
    }

    // Compute coefficients (note that we have to treat the
    // intercept separately so that it doesn't get affected
    // by the ridge constant.)
    double[] coefficients = new double[numAttributes + 1];
    if (numAttributes > 0) {
        double[] coeffsWithoutIntercept = independent.regression(dependent, m_Ridge).getCoefficients();
        System.arraycopy(coeffsWithoutIntercept, 0, coefficients, 0, numAttributes);
    }
    coefficients[numAttributes] = m_ClassMean;

    // Convert coefficients into original scale
    int column = 0;
    for (int i = 0; i < m_TransformedData.numAttributes(); i++) {
        if ((i != m_TransformedData.classIndex()) && (selectedAttributes[i])) {

            // We only need to do this if we have scaled the
            // input.
            if (!m_checksTurnedOff) {
                coefficients[column] /= m_StdDevs[i];
            }

            // We have centred the input
            coefficients[coefficients.length - 1] -= coefficients[column] * m_Means[i];
            column++;
        }
    }

    return coefficients;
}

From source file:classifiers.ComplexClassifier.java

@Override
public boolean Classify(Instance in) {
    double[][] probabilities = new double[Modelmenge.numAttributes()][Modelmenge.numClasses()];
    Enumeration<Attribute> enu = Modelmenge.enumerateAttributes();
    int attindex = 0;
    while (enu.hasMoreElements()) {
        Attribute att = enu.nextElement();

        for (int i = 0; i < Modelmenge.numClasses(); i++) {

            if (att.index() < list.size()) {
                probabilities[att.index()][i] = list.get(att.index()).getProbs(in);
            }/*  www .  j  a  va 2  s .com*/

        }

        attindex++;
    }

    for (int i = 0; i < Modelmenge.numClasses(); i++) {
        for (int j = 0; j < Modelmenge.numAttributes(); j++) {
            Classparam[i] *= probabilities[j][i];
        }
    }

    return (Maxindex(Classparam) == in.classValue());
}

From source file:classifiers.ComplexClassifierZufall.java

@Override
public boolean Classify(Instance in) {

    double[][] probabilities = new double[Modelmenge.numAttributes()][Modelmenge.numClasses()];
    Enumeration<Attribute> enu = Modelmenge.enumerateAttributes();
    int attindex = 0;
    while (enu.hasMoreElements()) {
        Attribute att = enu.nextElement();

        for (int i = 0; i < Modelmenge.numClasses(); i++) {

            if (att.index() < list.size()) {
                probabilities[att.index()][i] = list.get(att.index()).getProbs(in);
            }//from  w  w w  .  j av a2 s.  c om

        }

        attindex++;
    }

    for (int i = 0; i < Modelmenge.numClasses(); i++) {
        for (int j = 0; j < Modelmenge.numAttributes(); j++) {
            Classparam[i] *= probabilities[j][i];
        }
    }

    return (Maxindex(Classparam) == in.classValue());
}