Example usage for weka.core Instances attribute

List of usage examples for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name) 

Source Link

Document

Returns an attribute given its name.

Usage

From source file:j48.BinC45Split.java

License:Open Source License

/**
 * Prints the condition satisfied by instances in a subset.
 *
 * @param index of subset and training set.
 *///ww w.  j a  v a  2  s  .c om
public final String rightSide(int index, Instances data) {

    StringBuffer text;

    text = new StringBuffer();
    if (data.attribute(m_attIndex).isNominal()) {
        if (index == 0)
            text.append(" = " + data.attribute(m_attIndex).value((int) m_splitPoint));
        else
            text.append(" != " + data.attribute(m_attIndex).value((int) m_splitPoint));
    } else if (index == 0)
        text.append(" <= " + m_splitPoint);
    else
        text.append(" > " + m_splitPoint);

    return text.toString();
}

From source file:j48.BinC45Split.java

License:Open Source License

/**
 * Returns a string containing java source code equivalent to the test
 * made at this node. The instance being tested is called "i".
 *
 * @param index index of the nominal value tested
 * @param data the data containing instance structure info
 * @return a value of type 'String'//w w w . jav a2 s.  c om
 */
public final String sourceExpression(int index, Instances data) {

    StringBuffer expr = null;
    if (index < 0) {
        return "i[" + m_attIndex + "] == null";
    }
    if (data.attribute(m_attIndex).isNominal()) {
        if (index == 0) {
            expr = new StringBuffer("i[");
        } else {
            expr = new StringBuffer("!i[");
        }
        expr.append(m_attIndex).append("]");
        expr.append(".equals(\"").append(data.attribute(m_attIndex).value((int) m_splitPoint)).append("\")");
    } else {
        expr = new StringBuffer("((Double) i[");
        expr.append(m_attIndex).append("])");
        if (index == 0) {
            expr.append(".doubleValue() <= ").append(m_splitPoint);
        } else {
            expr.append(".doubleValue() > ").append(m_splitPoint);
        }
    }
    return expr.toString();
}

From source file:j48.BinC45Split.java

License:Open Source License

/**
 * Sets split point to greatest value in given data smaller or equal to
 * old split point.//  w  w  w  .ja v a  2  s . c om
 * (C4.5 does this for some strange reason).
 */
public final void setSplitPoint(Instances allInstances) {

    double newSplitPoint = -Double.MAX_VALUE;
    double tempValue;
    Instance instance;

    if ((!allInstances.attribute(m_attIndex).isNominal()) && (m_numSubsets > 1)) {
        Enumeration enu = allInstances.enumerateInstances();
        while (enu.hasMoreElements()) {
            instance = (Instance) enu.nextElement();
            if (!instance.isMissing(m_attIndex)) {
                tempValue = instance.value(m_attIndex);
                if (Utils.gr(tempValue, newSplitPoint) && Utils.smOrEq(tempValue, m_splitPoint))
                    newSplitPoint = tempValue;
            }
        }
        m_splitPoint = newSplitPoint;
    }
}

From source file:j48.C45ModelSelection.java

License:Open Source License

/**
 * Selects C4.5-type split for the given dataset.
 *//*ww  w  .j av  a  2  s .  c o  m*/
public final ClassifierSplitModel selectModel(Instances data) {

    double minResult;
    double currentResult;
    C45Split[] currentModel;
    C45Split bestModel = null;
    NoSplit noSplitModel = null;
    double averageInfoGain = 0;
    int validModels = 0;
    boolean multiVal = true;
    Distribution checkDistribution;
    Attribute attribute;
    double sumOfWeights;
    int i;

    try {

        // Check if all Instances belong to one class or if not
        // enough Instances to split.
        checkDistribution = new Distribution(data);
        noSplitModel = new NoSplit(checkDistribution);
        if (Utils.sm(checkDistribution.total(), 2 * m_minNoObj) || Utils.eq(checkDistribution.total(),
                checkDistribution.perClass(checkDistribution.maxClass())))
            return noSplitModel;

        // Check if all attributes are nominal and have a
        // lot of values.
        if (m_allData != null) {
            Enumeration enu = data.enumerateAttributes();
            while (enu.hasMoreElements()) {
                attribute = (Attribute) enu.nextElement();
                if ((attribute.isNumeric()) || (Utils.sm((double) attribute.numValues(),
                        (0.3 * (double) m_allData.numInstances())))) {
                    multiVal = false;
                    break;
                }
            }
        }

        currentModel = new j48.C45Split[data.numAttributes()];
        sumOfWeights = data.sumOfWeights();

        // For each attribute.
        for (i = 0; i < data.numAttributes(); i++) {

            // Apart from class attribute.
            if (i != (data).classIndex()) {

                // Get models for current attribute.
                currentModel[i] = new j48.C45Split(i, m_minNoObj, sumOfWeights);
                currentModel[i].buildClassifier(data);

                // Check if useful split for current attribute
                // exists and check for enumerated attributes with
                // a lot of values.
                if (currentModel[i].checkModel())
                    if (m_allData != null) {
                        if ((data.attribute(i).isNumeric())
                                || (multiVal || Utils.sm((double) data.attribute(i).numValues(),
                                        (0.3 * (double) m_allData.numInstances())))) {
                            averageInfoGain = averageInfoGain + currentModel[i].infoGain();
                            validModels++;
                        }
                    } else {
                        averageInfoGain = averageInfoGain + currentModel[i].infoGain();
                        validModels++;
                    }
            } else
                currentModel[i] = null;
        }

        // Check if any useful split was found.
        if (validModels == 0)
            return noSplitModel;
        averageInfoGain = averageInfoGain / (double) validModels;

        // Find "best" attribute to split on.
        minResult = 0;
        for (i = 0; i < data.numAttributes(); i++) {
            if ((i != (data).classIndex()) && (currentModel[i].checkModel()))

                // Use 1E-3 here to get a closer approximation to the
                // original
                // implementation.
                if ((currentModel[i].infoGain() >= (averageInfoGain - 1E-3))
                        && Utils.gr(currentModel[i].gainRatio(), minResult)) {
                    bestModel = currentModel[i];
                    minResult = currentModel[i].gainRatio();
                }
        }

        // Check if useful split was found.
        if (Utils.eq(minResult, 0))
            return noSplitModel;

        // Add all Instances with unknown values for the corresponding
        // attribute to the distribution for the model, so that
        // the complete distribution is stored with the model.
        bestModel.distribution().addInstWithUnknown(data, bestModel.attIndex());

        // Set the split point analogue to C45 if attribute numeric.
        if (m_allData != null)
            bestModel.setSplitPoint(m_allData);
        return bestModel;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}

From source file:j48.C45PruneableClassifierTreeG.java

License:Open Source License

/**
 * finds new nodes that improve accuracy and grafts them onto the tree
 *
 * @param fulldata the instances in whole trainset
 * @param iindex records num tests each instance has failed up to this node
 * @param limits the upper/lower limits for numeric attributes
 * @param parent the node immediately before the current one
 * @param pLaplace laplace for leaf, calculated by parent (in case leaf empty)
 * @param pLeafClass class of leaf, determined by parent (in case leaf empty)
 *//*from  w w w .ja v  a 2 s . c o m*/
private void findGraft(Instances fulldata, double[][] iindex, double[][] limits, ClassifierTree parent,
        double pLaplace, int pLeafClass) throws Exception {

    // get the class for this leaf
    int leafClass = (m_isEmpty) ? pLeafClass : localModel().distribution().maxClass();

    // get the laplace value for this leaf
    double leafLaplace = (m_isEmpty) ? pLaplace : laplaceLeaf(leafClass);

    // sort the instances into those at the leaf, those in atbop, and discarded
    Instances l = new Instances(fulldata, fulldata.numInstances());
    Instances n = new Instances(fulldata, fulldata.numInstances());
    int lcount = 0;
    int acount = 0;
    for (int x = 0; x < fulldata.numInstances(); x++) {
        if (iindex[0][x] <= 0 && iindex[1][x] <= 0)
            continue;
        if (iindex[0][x] != 0) {
            l.add(fulldata.instance(x));
            l.instance(lcount).setWeight(iindex[0][x]);
            // move instance's weight in iindex to same index as in l
            iindex[0][lcount++] = iindex[0][x];
        }
        if (iindex[1][x] > 0) {
            n.add(fulldata.instance(x));
            n.instance(acount).setWeight(iindex[1][x]);
            // move instance's weight in iindex to same index as in n
            iindex[1][acount++] = iindex[1][x];
        }
    }

    boolean graftPossible = false;
    double[] classDist = new double[n.numClasses()];
    for (int x = 0; x < n.numInstances(); x++) {
        if (iindex[1][x] > 0 && !n.instance(x).classIsMissing())
            classDist[(int) n.instance(x).classValue()] += iindex[1][x];
    }

    for (int cVal = 0; cVal < n.numClasses(); cVal++) {
        double theLaplace = (classDist[cVal] + 1.0) / (classDist[cVal] + 2.0);
        if (cVal != leafClass && (theLaplace > leafLaplace)
                && (biprob(classDist[cVal], classDist[cVal], leafLaplace) > m_BiProbCrit)) {
            graftPossible = true;
            break;
        }
    }

    if (!graftPossible) {
        return;
    }

    // 1. Initialize to {} a set of tuples t containing potential tests
    ArrayList t = new ArrayList();

    // go through each attribute
    for (int a = 0; a < n.numAttributes(); a++) {
        if (a == n.classIndex())
            continue; // skip the class

        // sort instances in atbop by $a
        int[] sorted = sortByAttribute(n, a);

        // 2. For each continuous attribute $a:
        if (n.attribute(a).isNumeric()) {

            // find min and max values for this attribute at the leaf
            boolean prohibited = false;
            double minLeaf = Double.POSITIVE_INFINITY;
            double maxLeaf = Double.NEGATIVE_INFINITY;
            for (int i = 0; i < l.numInstances(); i++) {
                if (l.instance(i).isMissing(a)) {
                    if (l.instance(i).classValue() == leafClass) {
                        prohibited = true;
                        break;
                    }
                }
                double value = l.instance(i).value(a);
                if (!m_relabel || l.instance(i).classValue() == leafClass) {
                    if (value < minLeaf)
                        minLeaf = value;
                    if (value > maxLeaf)
                        maxLeaf = value;
                }
            }
            if (prohibited) {
                continue;
            }

            // (a) find values of
            //    $n: instances in atbop (already have that, actually)
            //    $v: a value for $a that exists for a case in the atbop, where
            //       $v is < the min value for $a for a case at the leaf which
            //       has the class $c, and $v is > the lowerlimit of $a at
            //       the leaf.
            //       (note: error in original paper stated that $v must be
            //       smaller OR EQUAL TO the min value).
            //    $k: $k is a class
            //  that maximize L' = Laplace({$x: $x contained in cases($n)
            //    & value($a,$x) <= $v & value($a,$x) > lowerlim($l,$a)}, $k).
            double minBestClass = Double.NaN;
            double minBestLaplace = leafLaplace;
            double minBestVal = Double.NaN;
            double minBestPos = Double.NaN;
            double minBestTotal = Double.NaN;
            double[][] minBestCounts = null;
            double[][] counts = new double[2][n.numClasses()];
            for (int x = 0; x < n.numInstances(); x++) {
                if (n.instance(sorted[x]).isMissing(a))
                    break; // missing are sorted to end: no more valid vals

                double theval = n.instance(sorted[x]).value(a);
                if (m_Debug)
                    System.out.println("\t " + theval);

                if (theval <= limits[a][0]) {
                    if (m_Debug)
                        System.out.println("\t  <= lowerlim: continuing...");
                    continue;
                }
                // note: error in paper would have this read "theVal > minLeaf)
                if (theval >= minLeaf) {
                    if (m_Debug)
                        System.out.println("\t  >= minLeaf; breaking...");
                    break;
                }
                counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];

                if (x != n.numInstances() - 1) {
                    int z = x + 1;
                    while (z < n.numInstances() && n.instance(sorted[z]).value(a) == theval) {
                        z++;
                        x++;
                        counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];
                    }
                }

                // work out the best laplace/class (for <= theval)
                double total = Utils.sum(counts[0]);
                for (int c = 0; c < n.numClasses(); c++) {
                    double temp = (counts[0][c] + 1.0) / (total + 2.0);
                    if (temp > minBestLaplace) {
                        minBestPos = counts[0][c];
                        minBestTotal = total;
                        minBestLaplace = temp;
                        minBestClass = c;
                        minBestCounts = copyCounts(counts);

                        minBestVal = (x == n.numInstances() - 1) ? theval
                                : ((theval + n.instance(sorted[x + 1]).value(a)) / 2.0);
                    }
                }
            }

            // (b) add to t tuple <n,a,v,k,L',"<=">
            if (!Double.isNaN(minBestVal) && biprob(minBestPos, minBestTotal, leafLaplace) > m_BiProbCrit) {
                GraftSplit gsplit = null;
                try {
                    gsplit = new GraftSplit(a, minBestVal, 0, leafClass, minBestCounts);
                } catch (Exception e) {
                    System.err.println("graftsplit error: " + e.getMessage());
                    System.exit(1);
                }
                t.add(gsplit);
            }
            // free space
            minBestCounts = null;

            // (c) find values of
            //    n: instances in atbop (already have that, actually)
            //    $v: a value for $a that exists for a case in the atbop, where
            //       $v is > the max value for $a for a case at the leaf which
            //       has the class $c, and $v is <= the upperlimit of $a at
            //       the leaf.
            //    k: k is a class
            //   that maximize L' = Laplace({x: x contained in cases(n)
            //       & value(a,x) > v & value(a,x) <= upperlim(l,a)}, k).
            double maxBestClass = -1;
            double maxBestLaplace = leafLaplace;
            double maxBestVal = Double.NaN;
            double maxBestPos = Double.NaN;
            double maxBestTotal = Double.NaN;
            double[][] maxBestCounts = null;
            for (int c = 0; c < n.numClasses(); c++) { // zero the counts
                counts[0][c] = 0;
                counts[1][c] = 0; // shouldn't need to do this ...
            }

            // check smallest val for a in atbop is < upper limit
            if (n.numInstances() >= 1 && n.instance(sorted[0]).value(a) < limits[a][1]) {
                for (int x = n.numInstances() - 1; x >= 0; x--) {
                    if (n.instance(sorted[x]).isMissing(a))
                        continue;

                    double theval = n.instance(sorted[x]).value(a);
                    if (m_Debug)
                        System.out.println("\t " + theval);

                    if (theval > limits[a][1]) {
                        if (m_Debug)
                            System.out.println("\t  >= upperlim; continuing...");
                        continue;
                    }
                    if (theval <= maxLeaf) {
                        if (m_Debug)
                            System.out.println("\t  < maxLeaf; breaking...");
                        break;
                    }

                    // increment counts
                    counts[1][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];

                    if (x != 0 && !n.instance(sorted[x - 1]).isMissing(a)) {
                        int z = x - 1;
                        while (z >= 0 && n.instance(sorted[z]).value(a) == theval) {
                            z--;
                            x--;
                            counts[1][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];
                        }
                    }

                    // work out best laplace for > theval
                    double total = Utils.sum(counts[1]);
                    for (int c = 0; c < n.numClasses(); c++) {
                        double temp = (counts[1][c] + 1.0) / (total + 2.0);
                        if (temp > maxBestLaplace) {
                            maxBestPos = counts[1][c];
                            maxBestTotal = total;
                            maxBestLaplace = temp;
                            maxBestClass = c;
                            maxBestCounts = copyCounts(counts);
                            maxBestVal = (x == 0) ? theval
                                    : ((theval + n.instance(sorted[x - 1]).value(a)) / 2.0);
                        }
                    }
                }

                // (d) add to t tuple <n,a,v,k,L',">">
                if (!Double.isNaN(maxBestVal) && biprob(maxBestPos, maxBestTotal, leafLaplace) > m_BiProbCrit) {
                    GraftSplit gsplit = null;
                    try {
                        gsplit = new GraftSplit(a, maxBestVal, 1, leafClass, maxBestCounts);
                    } catch (Exception e) {
                        System.err.println("graftsplit error:" + e.getMessage());
                        System.exit(1);
                    }
                    t.add(gsplit);
                }
            }
        } else { // must be a nominal attribute

            // 3. for each discrete attribute a for which there is no
            //    test at an ancestor of l

            // skip if this attribute has already been used
            if (limits[a][1] == 1) {
                continue;
            }

            boolean[] prohibit = new boolean[l.attribute(a).numValues()];
            for (int aval = 0; aval < n.attribute(a).numValues(); aval++) {
                for (int x = 0; x < l.numInstances(); x++) {
                    if ((l.instance(x).isMissing(a) || l.instance(x).value(a) == aval)
                            && (!m_relabel || (l.instance(x).classValue() == leafClass))) {
                        prohibit[aval] = true;
                        break;
                    }
                }
            }

            // (a) find values of
            //       $n: instances in atbop (already have that, actually)
            //       $v: $v is a value for $a
            //       $k: $k is a class
            //     that maximize L' = Laplace({$x: $x contained in cases($n)
            //           & value($a,$x) = $v}, $k).
            double bestVal = Double.NaN;
            double bestClass = Double.NaN;
            double bestLaplace = leafLaplace;
            double[][] bestCounts = null;
            double[][] counts = new double[2][n.numClasses()];

            for (int x = 0; x < n.numInstances(); x++) {
                if (n.instance(sorted[x]).isMissing(a))
                    continue;

                // zero the counts
                for (int c = 0; c < n.numClasses(); c++)
                    counts[0][c] = 0;

                double theval = n.instance(sorted[x]).value(a);
                counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];

                if (x != n.numInstances() - 1) {
                    int z = x + 1;
                    while (z < n.numInstances() && n.instance(sorted[z]).value(a) == theval) {
                        z++;
                        x++;
                        counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]];
                    }
                }

                if (!prohibit[(int) theval]) {
                    // work out best laplace for > theval
                    double total = Utils.sum(counts[0]);
                    bestLaplace = leafLaplace;
                    bestClass = Double.NaN;
                    for (int c = 0; c < n.numClasses(); c++) {
                        double temp = (counts[0][c] + 1.0) / (total + 2.0);
                        if (temp > bestLaplace && biprob(counts[0][c], total, leafLaplace) > m_BiProbCrit) {
                            bestLaplace = temp;
                            bestClass = c;
                            bestVal = theval;
                            bestCounts = copyCounts(counts);
                        }
                    }
                    // add to graft list
                    if (!Double.isNaN(bestClass)) {
                        GraftSplit gsplit = null;
                        try {
                            gsplit = new GraftSplit(a, bestVal, 2, leafClass, bestCounts);
                        } catch (Exception e) {
                            System.err.println("graftsplit error: " + e.getMessage());
                            System.exit(1);
                        }
                        t.add(gsplit);
                    }
                }
            }
            // (b) add to t tuple <n,a,v,k,L',"=">
            // done this already
        }
    }

    // 4. remove from t all tuples <n,a,v,c,L,x> such that L <=
    //    Laplace(cases(l),c) or prob(x,n,Laplace(cases(l),c) <= 0.05
    //      -- checked this constraint prior to adding a tuple --

    // *** step six done before step five for efficiency ***
    // 6. for each <n,a,v,k,L,x> in t ordered on L from highest to lowest
    // order the tuples from highest to lowest laplace
    // (this actually orders lowest to highest)
    Collections.sort(t);

    // 5. remove from t all tuples <n,a,v,c,L,x> such that there is
    //    no tuple <n',a',v',k',L',x'> such that k' != c & L' < L.
    for (int x = 0; x < t.size(); x++) {
        GraftSplit gs = (GraftSplit) t.get(x);
        if (gs.maxClassForSubsetOfInterest() != leafClass) {
            break; // reached a graft with class != leafClass, so stop deleting
        } else {
            t.remove(x);
            x--;
        }
    }

    // if no potential grafts were found, do nothing and return
    if (t.size() < 1) {
        return;
    }

    // create the distributions for each graft
    for (int x = t.size() - 1; x >= 0; x--) {
        GraftSplit gs = (GraftSplit) t.get(x);
        try {
            gs.buildClassifier(l);
            gs.deleteGraftedCases(l); // so they don't go down the other branch
        } catch (Exception e) {
            System.err.println("graftsplit build error: " + e.getMessage());
        }
    }

    // add this stuff to the tree
    ((C45PruneableClassifierTreeG) parent).setDescendents(t, this);
}

From source file:j48.C45Split.java

License:Open Source License

public void buildClassifier(Instances trainInstances) throws Exception {

    // Initialize the remaining instance variables.
    m_numSubsets = 0;/*from  w  w w. ja  v  a 2  s.  c  o m*/
    m_splitPoint = Double.MAX_VALUE;
    m_infoGain = 0;
    m_gainRatio = 0;

    // Different treatment for enumerated and numeric
    // attributes.
    if (trainInstances.attribute(m_attIndex).isNominal()) {
        m_complexityIndex = trainInstances.attribute(m_attIndex).numValues();
        m_index = m_complexityIndex;
        handleEnumeratedAttribute(trainInstances);

    } else {
        m_complexityIndex = 2;
        m_index = 0;
        trainInstances.sort(trainInstances.attribute(m_attIndex));

        // ///////////////////////////////////////////////////////////////////////////////////////
        double stdDev = trainInstances.attributeStats(m_attIndex).numericStats.stdDev;
        if (stdDev > 200) {
            //      rrrrr = stdDev/200;
            //      System.out.println(stdDev+" ");
            rrrrr = Math.log10(stdDev) / 1.2;
            //      rrrrr = 1.1;
            //      lllll = stdDev/2000;

            //      lllll = 0.3;

            lllll = Math.log10(stdDev) / 8;
        } else {
            lllll = Math.log10(stdDev) / 1.2;
            //         lllll = stdDev/200;
            //         lllll = 1.1;

            //         rrrrr = stdDev/2000;
            //         rrrrr = 0.3;
            rrrrr = Math.log10(stdDev) / 8;

        }
        handleNumericAttribute(trainInstances);
    }
}

From source file:j48.C45Split.java

License:Open Source License

/**
 * Prints the condition satisfied by instances in a subset.
 * //from  w  w w.j  a va 2s . com
 * @param index
 *            of subset
 * @param data
 *            training set.
 */
public final String rightSide(int index, Instances data) {

    StringBuffer text;

    text = new StringBuffer();
    if (data.attribute(m_attIndex).isNominal())
        text.append(" = " + data.attribute(m_attIndex).value(index));
    else if (index == 0)
        text.append(" <= " + Utils.doubleToString(m_splitPoint, 6));
    else
        text.append(" > " + Utils.doubleToString(m_splitPoint, 6));
    return text.toString();
}

From source file:j48.C45Split.java

License:Open Source License

/**
 * Returns a string containing java source code equivalent to the test made
 * at this node. The instance being tested is called "i".
 * //from  w  ww.j a v  a  2s .  com
 * @param index
 *            index of the nominal value tested
 * @param data
 *            the data containing instance structure info
 * @return a value of type 'String'
 */
public final String sourceExpression(int index, Instances data) {

    StringBuffer expr = null;
    if (index < 0) {
        return "i[" + m_attIndex + "] == null";
    }
    if (data.attribute(m_attIndex).isNominal()) {
        expr = new StringBuffer("i[");
        expr.append(m_attIndex).append("]");
        expr.append(".equals(\"").append(data.attribute(m_attIndex).value(index)).append("\")");
    } else {
        expr = new StringBuffer("((Double) i[");
        expr.append(m_attIndex).append("])");
        if (index == 0) {
            expr.append(".doubleValue() <= ").append(m_splitPoint);
        } else {
            expr.append(".doubleValue() > ").append(m_splitPoint);
        }
    }
    return expr.toString();
}

From source file:j48.C45Split.java

License:Open Source License

/**
 * Sets split point to greatest value in given data smaller or equal to old
 * split point. (C4.5 does this for some strange reason).
 *///from   w  w  w .  ja v  a 2 s  .c  o m
public final void setSplitPoint(Instances allInstances) {

    double newSplitPoint = -Double.MAX_VALUE;
    double tempValue;
    Instance instance;

    if ((allInstances.attribute(m_attIndex).isNumeric()) && (m_numSubsets > 1)) {
        Enumeration enu = allInstances.enumerateInstances();
        while (enu.hasMoreElements()) {
            instance = (Instance) enu.nextElement();
            if (!instance.isMissing(m_attIndex)) {
                tempValue = instance.value(m_attIndex);
                if (Utils.gr(tempValue, newSplitPoint) && Utils.smOrEq(tempValue, m_splitPoint))
                    newSplitPoint = tempValue;
            }
        }
        m_splitPoint = newSplitPoint;
    }
}

From source file:j48.C45Split.java

License:Open Source License

/**
 * Returns the minsAndMaxs of the index.th subset.
 *//*from www  .j  av  a 2s.c  om*/
public final double[][] minsAndMaxs(Instances data, double[][] minsAndMaxs, int index) {

    double[][] newMinsAndMaxs = new double[data.numAttributes()][2];

    for (int i = 0; i < data.numAttributes(); i++) {
        newMinsAndMaxs[i][0] = minsAndMaxs[i][0];
        newMinsAndMaxs[i][1] = minsAndMaxs[i][1];
        if (i == m_attIndex)
            if (data.attribute(m_attIndex).isNominal())
                newMinsAndMaxs[m_attIndex][1] = 1;
            else
                newMinsAndMaxs[m_attIndex][1 - index] = m_splitPoint;
    }

    return newMinsAndMaxs;
}