Example usage for weka.core Utils smOrEq

List of usage examples for weka.core Utils smOrEq

Introduction

In this page you can find the example usage for weka.core Utils smOrEq.

Prototype

public staticboolean smOrEq(double a, double b) 

Source Link

Document

Tests if a is smaller or equal to b.

Usage

From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java

License:Open Source License

private static double chiCell(double freq, double expected, boolean yates) {

    // Cell in empty row and column?
    if (Utils.smOrEq(expected, 0)) {
        return 0;
    }// w  w w .  j  av  a 2  s . c o m

    // Compute difference between observed and expected value
    double diff = Math.abs(freq - expected);
    if (yates) {

        // Apply Yates' correction if wanted
        diff -= 0.5;

        // The difference should never be negative
        if (diff < 0) {
            diff = 0;
        }
    }

    // Return chi-value for the cell
    return (diff * diff / expected);
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Returns the correlation coefficient if the class is numeric.
 *
 * @return the correlation coefficient/*ww  w  . j av a2 s . c  om*/
 * @exception Exception if class is not numeric
 */
public final double correlationCoefficient() throws Exception {

    if (m_ClassIsNominal) {
        throw new Exception("Can't compute correlation coefficient: " + "class is nominal!");
    }

    double correlation = 0;
    double varActual = m_SumSqrClass - m_SumClass * m_SumClass / m_WithClass;
    double varPredicted = m_SumSqrPredicted - m_SumPredicted * m_SumPredicted / m_WithClass;
    double varProd = m_SumClassPredicted - m_SumClass * m_SumPredicted / m_WithClass;

    if (Utils.smOrEq(varActual * varPredicted, 0.0)) {
        correlation = 0.0;
    } else {
        correlation = varProd / Math.sqrt(varActual * varPredicted);
    }

    return correlation;
}

From source file:j48.BinC45Split.java

License:Open Source License

/**
 * Creates split on numeric attribute./*from www .j  a va2 s.  com*/
 *
 * @exception Exception if something goes wrong
 */
private void handleNumericAttribute(Instances trainInstances) throws Exception {

    int firstMiss;
    int next = 1;
    int last = 0;
    int index = 0;
    int splitIndex = -1;
    double currentInfoGain;
    double defaultEnt;
    double minSplit;
    Instance instance;
    int i;

    // Current attribute is a numeric attribute.
    m_distribution = new Distribution(2, trainInstances.numClasses());

    // Only Instances with known values are relevant.
    Enumeration enu = trainInstances.enumerateInstances();
    i = 0;
    while (enu.hasMoreElements()) {
        instance = (Instance) enu.nextElement();
        if (instance.isMissing(m_attIndex))
            break;
        m_distribution.add(1, instance);
        i++;
    }
    firstMiss = i;

    // Compute minimum number of Instances required in each
    // subset.
    minSplit = 0.1 * (m_distribution.total()) / ((double) trainInstances.numClasses());
    if (Utils.smOrEq(minSplit, m_minNoObj))
        minSplit = m_minNoObj;
    else if (Utils.gr(minSplit, 25))
        minSplit = 25;

    // Enough Instances with known values?
    if (Utils.sm((double) firstMiss, 2 * minSplit))
        return;

    // Compute values of criteria for all possible split
    // indices.
    defaultEnt = m_infoGainCrit.oldEnt(m_distribution);
    while (next < firstMiss) {

        if (trainInstances.instance(next - 1).value(m_attIndex) + 1e-5 < trainInstances.instance(next)
                .value(m_attIndex)) {

            // Move class values for all Instances up to next 
            // possible split point.
            m_distribution.shiftRange(1, 0, trainInstances, last, next);

            // Check if enough Instances in each subset and compute
            // values for criteria.
            if (Utils.grOrEq(m_distribution.perBag(0), minSplit)
                    && Utils.grOrEq(m_distribution.perBag(1), minSplit)) {
                currentInfoGain = m_infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights, defaultEnt);
                if (Utils.gr(currentInfoGain, m_infoGain)) {
                    m_infoGain = currentInfoGain;
                    splitIndex = next - 1;
                }
                index++;
            }
            last = next;
        }
        next++;
    }

    // Was there any useful split?
    if (index == 0)
        return;

    // Compute modified information gain for best split.
    m_infoGain = m_infoGain - (Utils.log2(index) / m_sumOfWeights);
    if (Utils.smOrEq(m_infoGain, 0))
        return;

    // Set instance variables' values to values for
    // best split.
    m_numSubsets = 2;
    m_splitPoint = (trainInstances.instance(splitIndex + 1).value(m_attIndex)
            + trainInstances.instance(splitIndex).value(m_attIndex)) / 2;

    // In case we have a numerical precision problem we need to choose the
    // smaller value
    if (m_splitPoint == trainInstances.instance(splitIndex + 1).value(m_attIndex)) {
        m_splitPoint = trainInstances.instance(splitIndex).value(m_attIndex);
    }

    // Restore distributioN for best split.
    m_distribution = new Distribution(2, trainInstances.numClasses());
    m_distribution.addRange(0, trainInstances, 0, splitIndex + 1);
    m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss);

    // Compute modified gain ratio for best split.
    m_gainRatio = m_gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain);
}

From source file:j48.BinC45Split.java

License:Open Source License

/**
 * Sets split point to greatest value in given data smaller or equal to
 * old split point.//  w  ww .  ja v a  2 s  . co m
 * (C4.5 does this for some strange reason).
 */
public final void setSplitPoint(Instances allInstances) {

    double newSplitPoint = -Double.MAX_VALUE;
    double tempValue;
    Instance instance;

    if ((!allInstances.attribute(m_attIndex).isNominal()) && (m_numSubsets > 1)) {
        Enumeration enu = allInstances.enumerateInstances();
        while (enu.hasMoreElements()) {
            instance = (Instance) enu.nextElement();
            if (!instance.isMissing(m_attIndex)) {
                tempValue = instance.value(m_attIndex);
                if (Utils.gr(tempValue, newSplitPoint) && Utils.smOrEq(tempValue, m_splitPoint))
                    newSplitPoint = tempValue;
            }
        }
        m_splitPoint = newSplitPoint;
    }
}

From source file:j48.BinC45Split.java

License:Open Source License

/**
 * Returns index of subset instance is assigned to.
 * Returns -1 if instance is assigned to more than one subset.
 *
 * @exception Exception if something goes wrong
 *//*from   www  .ja  v  a2  s.c o m*/

public final int whichSubset(Instance instance) throws Exception {

    if (instance.isMissing(m_attIndex))
        return -1;
    else {
        if (instance.attribute(m_attIndex).isNominal()) {
            if ((int) m_splitPoint == (int) instance.value(m_attIndex))
                return 0;
            else
                return 1;
        } else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint))
            return 0;
        else
            return 1;
    }
}

From source file:j48.C45PruneableClassifierTree.java

License:Open Source License

/**
 * Prunes a tree using C4.5's pruning procedure.
 *
 * @throws Exception if something goes wrong
 *//* w  ww .j a va 2s  . c om*/
public void prune() throws Exception {

    double errorsLargestBranch;
    double errorsLeaf;
    double errorsTree;
    int indexOfLargestBranch;
    C45PruneableClassifierTree largestBranch;
    int i;

    if (!m_isLeaf) {

        // Prune all subtrees.
        for (i = 0; i < m_sons.length; i++)
            son(i).prune();

        // Compute error for largest branch
        indexOfLargestBranch = localModel().distribution().maxBag();
        if (m_subtreeRaising) {
            errorsLargestBranch = son(indexOfLargestBranch).getEstimatedErrorsForBranch((Instances) m_train);
        } else {
            errorsLargestBranch = Double.MAX_VALUE;
        }

        // Compute error if this Tree would be leaf
        errorsLeaf = getEstimatedErrorsForDistribution(localModel().distribution());

        // Compute error for the whole subtree
        errorsTree = getEstimatedErrors();

        // Decide if leaf is best choice.
        if (Utils.smOrEq(errorsLeaf, errorsTree + 0.1) && Utils.smOrEq(errorsLeaf, errorsLargestBranch + 0.1)) {

            // Free son Trees
            m_sons = null;
            m_isLeaf = true;

            // Get NoSplit Model for node.
            m_localModel = new NoSplit(localModel().distribution());
            return;
        }

        // Decide if largest branch is better choice
        // than whole subtree.
        if (Utils.smOrEq(errorsLargestBranch, errorsTree + 0.1)) {
            largestBranch = son(indexOfLargestBranch);
            m_sons = largestBranch.m_sons;
            m_localModel = largestBranch.localModel();
            m_isLeaf = largestBranch.m_isLeaf;
            newDistribution(m_train);
            prune();
        }
    }
}

From source file:j48.C45PruneableClassifierTreeG.java

License:Open Source License

/**
 * Prunes a tree using C4.5's pruning procedure.
 *
 * @throws Exception if something goes wrong
 *///from w ww  . j  av  a  2s .com
public void prune() throws Exception {

    double errorsLargestBranch;
    double errorsLeaf;
    double errorsTree;
    int indexOfLargestBranch;
    C45PruneableClassifierTreeG largestBranch;
    int i;

    if (!m_isLeaf) {

        // Prune all subtrees.
        for (i = 0; i < m_sons.length; i++)
            son(i).prune();

        // Compute error for largest branch
        indexOfLargestBranch = localModel().distribution().maxBag();
        if (m_subtreeRaising) {
            errorsLargestBranch = son(indexOfLargestBranch).getEstimatedErrorsForBranch((Instances) m_train);
        } else {
            errorsLargestBranch = Double.MAX_VALUE;
        }

        // Compute error if this Tree would be leaf
        errorsLeaf = getEstimatedErrorsForDistribution(localModel().distribution());

        // Compute error for the whole subtree
        errorsTree = getEstimatedErrors();

        // Decide if leaf is best choice.
        if (Utils.smOrEq(errorsLeaf, errorsTree + 0.1) && Utils.smOrEq(errorsLeaf, errorsLargestBranch + 0.1)) {

            // Free son Trees
            m_sons = null;
            m_isLeaf = true;

            // Get NoSplit Model for node.
            m_localModel = new NoSplit(localModel().distribution());
            return;
        }

        // Decide if largest branch is better choice
        // than whole subtree.
        if (Utils.smOrEq(errorsLargestBranch, errorsTree + 0.1)) {
            largestBranch = son(indexOfLargestBranch);
            m_sons = largestBranch.m_sons;
            m_localModel = largestBranch.localModel();
            m_isLeaf = largestBranch.m_isLeaf;
            newDistribution(m_train);
            prune();
        }
    }
}

From source file:j48.C45Split.java

License:Open Source License

/**
 * Creates split on numeric attribute.//from  w ww .j  a v a2 s  .com
 * 
 * @exception Exception
 *                if something goes wrong
 */
private void handleNumericAttribute(Instances trainInstances) throws Exception {

    int firstMiss;
    int next = 1;
    int last = 0;
    int splitIndex = -1;
    double currentInfoGain;
    double defaultEnt;
    double minSplit;
    Instance instance;
    int i;

    // Current attribute is a numeric attribute.
    m_distribution = new Distribution(2, trainInstances.numClasses());

    // Only Instances with known values are relevant.
    Enumeration enu = trainInstances.enumerateInstances();
    i = 0;
    while (enu.hasMoreElements()) {
        instance = (Instance) enu.nextElement();
        if (instance.isMissing(m_attIndex))
            break;
        m_distribution.add(1, instance);
        i++;
    }
    firstMiss = i;

    // Compute minimum number of Instances required in each
    // subset.
    minSplit = 0.1 * (m_distribution.total()) / ((double) trainInstances.numClasses());
    if (Utils.smOrEq(minSplit, m_minNoObj))
        minSplit = m_minNoObj;
    else if (Utils.gr(minSplit, 25))
        minSplit = 25;

    // Enough Instances with known values?
    if (Utils.sm((double) firstMiss, 2 * minSplit))
        return;

    // Compute values of criteria for all possible split
    // indices.
    defaultEnt = infoGainCrit.oldEnt(m_distribution);
    while (next < firstMiss) {

        if (trainInstances.instance(next - 1).value(m_attIndex) + 1e-5 < trainInstances.instance(next)
                .value(m_attIndex)) {

            // Move class values for all Instances up to next
            // possible split point.
            m_distribution.shiftRange(1, 0, trainInstances, last, next);

            // Check if enough Instances in each subset and compute
            // values for criteria.
            if (Utils.grOrEq(m_distribution.perBag(0), minSplit)
                    && Utils.grOrEq(m_distribution.perBag(1), minSplit)) {
                currentInfoGain = infoGainCrit.splitCritValue1(m_distribution, m_sumOfWeights, defaultEnt,
                        rrrrr);
                if (Utils.gr(currentInfoGain, m_infoGain)) {
                    m_infoGain = currentInfoGain;
                    splitIndex = next - 1;
                }
                m_index++;
            }
            last = next;
        }
        next++;
    }

    // Was there any useful split?
    if (m_index == 0)
        return;

    // Compute modified information gain for best split.
    m_infoGain = m_infoGain - (Utils.log2(m_index) / m_sumOfWeights);
    if (Utils.smOrEq(m_infoGain, 0))
        return;

    // Set instance variables' values to values for
    // best split.
    m_numSubsets = 2;
    m_splitPoint = (trainInstances.instance(splitIndex + 1).value(m_attIndex)
            + trainInstances.instance(splitIndex).value(m_attIndex)) / 2;

    // In case we have a numerical precision problem we need to choose the
    // smaller value
    if (m_splitPoint == trainInstances.instance(splitIndex + 1).value(m_attIndex)) {
        m_splitPoint = trainInstances.instance(splitIndex).value(m_attIndex);
    }

    // Restore distributioN for best split.
    m_distribution = new Distribution(2, trainInstances.numClasses());
    m_distribution.addRange(0, trainInstances, 0, splitIndex + 1);
    m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss);

    // Compute modified gain ratio for best split.
    m_gainRatio = gainRatioCrit.splitCritValue1(m_distribution, m_sumOfWeights, m_infoGain, lllll);
}

From source file:j48.C45Split.java

License:Open Source License

/**
 * Sets split point to greatest value in given data smaller or equal to old
 * split point. (C4.5 does this for some strange reason).
 *//*w  w w.  j av a2s  . c om*/
public final void setSplitPoint(Instances allInstances) {

    double newSplitPoint = -Double.MAX_VALUE;
    double tempValue;
    Instance instance;

    if ((allInstances.attribute(m_attIndex).isNumeric()) && (m_numSubsets > 1)) {
        Enumeration enu = allInstances.enumerateInstances();
        while (enu.hasMoreElements()) {
            instance = (Instance) enu.nextElement();
            if (!instance.isMissing(m_attIndex)) {
                tempValue = instance.value(m_attIndex);
                if (Utils.gr(tempValue, newSplitPoint) && Utils.smOrEq(tempValue, m_splitPoint))
                    newSplitPoint = tempValue;
            }
        }
        m_splitPoint = newSplitPoint;
    }
}

From source file:j48.C45Split.java

License:Open Source License

/**
 * Returns index of subset instance is assigned to. Returns -1 if instance
 * is assigned to more than one subset.//from   w  w  w  .ja  v  a 2 s .  c  o m
 * 
 * @exception Exception
 *                if something goes wrong
 */
public final int whichSubset(Instance instance) throws Exception {

    if (instance.isMissing(m_attIndex))
        return -1;
    else {
        if (instance.attribute(m_attIndex).isNominal())
            return (int) instance.value(m_attIndex);
        else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint))
            return 0;
        else
            return 1;
    }
}