List of usage examples for weka.core Utils smOrEq
public staticboolean smOrEq(double a, double b)
From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java
License:Open Source License
private static double chiCell(double freq, double expected, boolean yates) { // Cell in empty row and column? if (Utils.smOrEq(expected, 0)) { return 0; }// w w w . j av a 2 s . c o m // Compute difference between observed and expected value double diff = Math.abs(freq - expected); if (yates) { // Apply Yates' correction if wanted diff -= 0.5; // The difference should never be negative if (diff < 0) { diff = 0; } } // Return chi-value for the cell return (diff * diff / expected); }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Returns the correlation coefficient if the class is numeric. * * @return the correlation coefficient/*ww w . j av a2 s . c om*/ * @exception Exception if class is not numeric */ public final double correlationCoefficient() throws Exception { if (m_ClassIsNominal) { throw new Exception("Can't compute correlation coefficient: " + "class is nominal!"); } double correlation = 0; double varActual = m_SumSqrClass - m_SumClass * m_SumClass / m_WithClass; double varPredicted = m_SumSqrPredicted - m_SumPredicted * m_SumPredicted / m_WithClass; double varProd = m_SumClassPredicted - m_SumClass * m_SumPredicted / m_WithClass; if (Utils.smOrEq(varActual * varPredicted, 0.0)) { correlation = 0.0; } else { correlation = varProd / Math.sqrt(varActual * varPredicted); } return correlation; }
From source file:j48.BinC45Split.java
License:Open Source License
/** * Creates split on numeric attribute./*from www .j a va2 s. com*/ * * @exception Exception if something goes wrong */ private void handleNumericAttribute(Instances trainInstances) throws Exception { int firstMiss; int next = 1; int last = 0; int index = 0; int splitIndex = -1; double currentInfoGain; double defaultEnt; double minSplit; Instance instance; int i; // Current attribute is a numeric attribute. m_distribution = new Distribution(2, trainInstances.numClasses()); // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); i = 0; while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (instance.isMissing(m_attIndex)) break; m_distribution.add(1, instance); i++; } firstMiss = i; // Compute minimum number of Instances required in each // subset. minSplit = 0.1 * (m_distribution.total()) / ((double) trainInstances.numClasses()); if (Utils.smOrEq(minSplit, m_minNoObj)) minSplit = m_minNoObj; else if (Utils.gr(minSplit, 25)) minSplit = 25; // Enough Instances with known values? if (Utils.sm((double) firstMiss, 2 * minSplit)) return; // Compute values of criteria for all possible split // indices. defaultEnt = m_infoGainCrit.oldEnt(m_distribution); while (next < firstMiss) { if (trainInstances.instance(next - 1).value(m_attIndex) + 1e-5 < trainInstances.instance(next) .value(m_attIndex)) { // Move class values for all Instances up to next // possible split point. m_distribution.shiftRange(1, 0, trainInstances, last, next); // Check if enough Instances in each subset and compute // values for criteria. if (Utils.grOrEq(m_distribution.perBag(0), minSplit) && Utils.grOrEq(m_distribution.perBag(1), minSplit)) { currentInfoGain = m_infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights, defaultEnt); if (Utils.gr(currentInfoGain, m_infoGain)) { m_infoGain = currentInfoGain; splitIndex = next - 1; } index++; } last = next; } next++; } // Was there any useful split? if (index == 0) return; // Compute modified information gain for best split. m_infoGain = m_infoGain - (Utils.log2(index) / m_sumOfWeights); if (Utils.smOrEq(m_infoGain, 0)) return; // Set instance variables' values to values for // best split. m_numSubsets = 2; m_splitPoint = (trainInstances.instance(splitIndex + 1).value(m_attIndex) + trainInstances.instance(splitIndex).value(m_attIndex)) / 2; // In case we have a numerical precision problem we need to choose the // smaller value if (m_splitPoint == trainInstances.instance(splitIndex + 1).value(m_attIndex)) { m_splitPoint = trainInstances.instance(splitIndex).value(m_attIndex); } // Restore distributioN for best split. m_distribution = new Distribution(2, trainInstances.numClasses()); m_distribution.addRange(0, trainInstances, 0, splitIndex + 1); m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss); // Compute modified gain ratio for best split. m_gainRatio = m_gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain); }
From source file:j48.BinC45Split.java
License:Open Source License
/** * Sets split point to greatest value in given data smaller or equal to * old split point.// w ww . ja v a 2 s . co m * (C4.5 does this for some strange reason). */ public final void setSplitPoint(Instances allInstances) { double newSplitPoint = -Double.MAX_VALUE; double tempValue; Instance instance; if ((!allInstances.attribute(m_attIndex).isNominal()) && (m_numSubsets > 1)) { Enumeration enu = allInstances.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (!instance.isMissing(m_attIndex)) { tempValue = instance.value(m_attIndex); if (Utils.gr(tempValue, newSplitPoint) && Utils.smOrEq(tempValue, m_splitPoint)) newSplitPoint = tempValue; } } m_splitPoint = newSplitPoint; } }
From source file:j48.BinC45Split.java
License:Open Source License
/** * Returns index of subset instance is assigned to. * Returns -1 if instance is assigned to more than one subset. * * @exception Exception if something goes wrong *//*from www .ja v a2 s.c o m*/ public final int whichSubset(Instance instance) throws Exception { if (instance.isMissing(m_attIndex)) return -1; else { if (instance.attribute(m_attIndex).isNominal()) { if ((int) m_splitPoint == (int) instance.value(m_attIndex)) return 0; else return 1; } else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint)) return 0; else return 1; } }
From source file:j48.C45PruneableClassifierTree.java
License:Open Source License
/** * Prunes a tree using C4.5's pruning procedure. * * @throws Exception if something goes wrong *//* w ww .j a va 2s . c om*/ public void prune() throws Exception { double errorsLargestBranch; double errorsLeaf; double errorsTree; int indexOfLargestBranch; C45PruneableClassifierTree largestBranch; int i; if (!m_isLeaf) { // Prune all subtrees. for (i = 0; i < m_sons.length; i++) son(i).prune(); // Compute error for largest branch indexOfLargestBranch = localModel().distribution().maxBag(); if (m_subtreeRaising) { errorsLargestBranch = son(indexOfLargestBranch).getEstimatedErrorsForBranch((Instances) m_train); } else { errorsLargestBranch = Double.MAX_VALUE; } // Compute error if this Tree would be leaf errorsLeaf = getEstimatedErrorsForDistribution(localModel().distribution()); // Compute error for the whole subtree errorsTree = getEstimatedErrors(); // Decide if leaf is best choice. if (Utils.smOrEq(errorsLeaf, errorsTree + 0.1) && Utils.smOrEq(errorsLeaf, errorsLargestBranch + 0.1)) { // Free son Trees m_sons = null; m_isLeaf = true; // Get NoSplit Model for node. m_localModel = new NoSplit(localModel().distribution()); return; } // Decide if largest branch is better choice // than whole subtree. if (Utils.smOrEq(errorsLargestBranch, errorsTree + 0.1)) { largestBranch = son(indexOfLargestBranch); m_sons = largestBranch.m_sons; m_localModel = largestBranch.localModel(); m_isLeaf = largestBranch.m_isLeaf; newDistribution(m_train); prune(); } } }
From source file:j48.C45PruneableClassifierTreeG.java
License:Open Source License
/** * Prunes a tree using C4.5's pruning procedure. * * @throws Exception if something goes wrong *///from w ww . j av a 2s .com public void prune() throws Exception { double errorsLargestBranch; double errorsLeaf; double errorsTree; int indexOfLargestBranch; C45PruneableClassifierTreeG largestBranch; int i; if (!m_isLeaf) { // Prune all subtrees. for (i = 0; i < m_sons.length; i++) son(i).prune(); // Compute error for largest branch indexOfLargestBranch = localModel().distribution().maxBag(); if (m_subtreeRaising) { errorsLargestBranch = son(indexOfLargestBranch).getEstimatedErrorsForBranch((Instances) m_train); } else { errorsLargestBranch = Double.MAX_VALUE; } // Compute error if this Tree would be leaf errorsLeaf = getEstimatedErrorsForDistribution(localModel().distribution()); // Compute error for the whole subtree errorsTree = getEstimatedErrors(); // Decide if leaf is best choice. if (Utils.smOrEq(errorsLeaf, errorsTree + 0.1) && Utils.smOrEq(errorsLeaf, errorsLargestBranch + 0.1)) { // Free son Trees m_sons = null; m_isLeaf = true; // Get NoSplit Model for node. m_localModel = new NoSplit(localModel().distribution()); return; } // Decide if largest branch is better choice // than whole subtree. if (Utils.smOrEq(errorsLargestBranch, errorsTree + 0.1)) { largestBranch = son(indexOfLargestBranch); m_sons = largestBranch.m_sons; m_localModel = largestBranch.localModel(); m_isLeaf = largestBranch.m_isLeaf; newDistribution(m_train); prune(); } } }
From source file:j48.C45Split.java
License:Open Source License
/** * Creates split on numeric attribute.//from w ww .j a v a2 s .com * * @exception Exception * if something goes wrong */ private void handleNumericAttribute(Instances trainInstances) throws Exception { int firstMiss; int next = 1; int last = 0; int splitIndex = -1; double currentInfoGain; double defaultEnt; double minSplit; Instance instance; int i; // Current attribute is a numeric attribute. m_distribution = new Distribution(2, trainInstances.numClasses()); // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); i = 0; while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (instance.isMissing(m_attIndex)) break; m_distribution.add(1, instance); i++; } firstMiss = i; // Compute minimum number of Instances required in each // subset. minSplit = 0.1 * (m_distribution.total()) / ((double) trainInstances.numClasses()); if (Utils.smOrEq(minSplit, m_minNoObj)) minSplit = m_minNoObj; else if (Utils.gr(minSplit, 25)) minSplit = 25; // Enough Instances with known values? if (Utils.sm((double) firstMiss, 2 * minSplit)) return; // Compute values of criteria for all possible split // indices. defaultEnt = infoGainCrit.oldEnt(m_distribution); while (next < firstMiss) { if (trainInstances.instance(next - 1).value(m_attIndex) + 1e-5 < trainInstances.instance(next) .value(m_attIndex)) { // Move class values for all Instances up to next // possible split point. m_distribution.shiftRange(1, 0, trainInstances, last, next); // Check if enough Instances in each subset and compute // values for criteria. if (Utils.grOrEq(m_distribution.perBag(0), minSplit) && Utils.grOrEq(m_distribution.perBag(1), minSplit)) { currentInfoGain = infoGainCrit.splitCritValue1(m_distribution, m_sumOfWeights, defaultEnt, rrrrr); if (Utils.gr(currentInfoGain, m_infoGain)) { m_infoGain = currentInfoGain; splitIndex = next - 1; } m_index++; } last = next; } next++; } // Was there any useful split? if (m_index == 0) return; // Compute modified information gain for best split. m_infoGain = m_infoGain - (Utils.log2(m_index) / m_sumOfWeights); if (Utils.smOrEq(m_infoGain, 0)) return; // Set instance variables' values to values for // best split. m_numSubsets = 2; m_splitPoint = (trainInstances.instance(splitIndex + 1).value(m_attIndex) + trainInstances.instance(splitIndex).value(m_attIndex)) / 2; // In case we have a numerical precision problem we need to choose the // smaller value if (m_splitPoint == trainInstances.instance(splitIndex + 1).value(m_attIndex)) { m_splitPoint = trainInstances.instance(splitIndex).value(m_attIndex); } // Restore distributioN for best split. m_distribution = new Distribution(2, trainInstances.numClasses()); m_distribution.addRange(0, trainInstances, 0, splitIndex + 1); m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss); // Compute modified gain ratio for best split. m_gainRatio = gainRatioCrit.splitCritValue1(m_distribution, m_sumOfWeights, m_infoGain, lllll); }
From source file:j48.C45Split.java
License:Open Source License
/** * Sets split point to greatest value in given data smaller or equal to old * split point. (C4.5 does this for some strange reason). *//*w w w. j av a2s . c om*/ public final void setSplitPoint(Instances allInstances) { double newSplitPoint = -Double.MAX_VALUE; double tempValue; Instance instance; if ((allInstances.attribute(m_attIndex).isNumeric()) && (m_numSubsets > 1)) { Enumeration enu = allInstances.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (!instance.isMissing(m_attIndex)) { tempValue = instance.value(m_attIndex); if (Utils.gr(tempValue, newSplitPoint) && Utils.smOrEq(tempValue, m_splitPoint)) newSplitPoint = tempValue; } } m_splitPoint = newSplitPoint; } }
From source file:j48.C45Split.java
License:Open Source License
/** * Returns index of subset instance is assigned to. Returns -1 if instance * is assigned to more than one subset.//from w w w .ja v a 2 s . c o m * * @exception Exception * if something goes wrong */ public final int whichSubset(Instance instance) throws Exception { if (instance.isMissing(m_attIndex)) return -1; else { if (instance.attribute(m_attIndex).isNominal()) return (int) instance.value(m_attIndex); else if (Utils.smOrEq(instance.value(m_attIndex), m_splitPoint)) return 0; else return 1; } }