Example usage for weka.core Utils normalize

List of usage examples for weka.core Utils normalize

Introduction

In this page you can find the example usage for weka.core Utils normalize.

Prototype

public static void normalize(double[] doubles) 

Source Link

Document

Normalizes the doubles in the array by their sum.

Usage

From source file:Bilbo.java

License:Open Source License

/**
 * Bagging method./*from   w  w  w.j  ava  2 s .  c  o m*/
 *
 * @param data the training data to be used for generating the
 * bagged classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data, Instances p_unlabeledData) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // Has user asked to represent copies using weights?
    if (getRepresentCopiesUsingWeights() && !(m_Classifier instanceof WeightedInstancesHandler)) {
        throw new IllegalArgumentException("Cannot represent copies using weights when "
                + "base learner in bagging does not implement " + "WeightedInstancesHandler.");
    }

    // get fresh Instances object
    m_data = new Instances(data);
    m_unlabeledData = new Instances(p_unlabeledData);

    super.buildClassifier(m_data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    m_random = new Random(m_Seed);

    m_inBag = null;
    if (m_CalcOutOfBag)
        m_inBag = new boolean[m_Classifiers.length][];

    for (int j = 0; j < m_Classifiers.length; j++) {
        if (m_Classifier instanceof Randomizable) {
            ((Randomizable) m_Classifiers[j]).setSeed(m_random.nextInt());
        }
    }
    //Insert oracle loop here TODO

    buildClassifiers();
    Instances inst = new Instances(m_data);
    for (int i = 0; i < m_Classifiers.length; i++) {
        inst.clear();
        ((NewTree) m_Classifiers[i]).GetTransductedInstances(inst);
        ((NewTree) m_Classifiers[i]).DoInduction(inst);
        // Ehm, do something boyski
    }
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = m_data.classAttribute().isNumeric();

        for (int i = 0; i < m_data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[m_data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (m_inBag[j][i])
                    continue;

                if (numeric) {
                    double pred = ((NewTree) m_Classifiers[j]).classifyInstance(m_data.instance(i));
                    if (!Utils.isMissingValue(pred)) {
                        votes[0] += pred;
                        voteCount++;
                    }
                } else {
                    voteCount++;
                    double[] newProbs = ((NewTree) m_Classifiers[j])
                            .distributionForInstance(m_data.instance(i));
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }
                }
            }

            // "vote"
            if (numeric) {
                if (voteCount == 0) {
                    vote = Utils.missingValue();
                } else {
                    vote = votes[0] / voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                    vote = Utils.missingValue();
                } else {
                    vote = Utils.maxIndex(votes); // predicted class
                    Utils.normalize(votes);
                }
            }

            // error for instance
            if (!Utils.isMissingValue(vote) && !m_data.instance(i).classIsMissing()) {
                outOfBagCount += m_data.instance(i).weight();
                if (numeric) {
                    errorSum += (StrictMath.abs(vote - m_data.instance(i).classValue())
                            * m_data.instance(i).weight()) / m_data.instance(i).classValue();
                } else {
                    if (vote != m_data.instance(i).classValue())
                        errorSum += m_data.instance(i).weight();
                }
            }
        }

        if (outOfBagCount > 0) {
            m_OutOfBagError = errorSum / outOfBagCount;
        }
    } else {
        m_OutOfBagError = 0;
    }

    // save memory
    m_data = null;
}

From source file:Bilbo.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test
 * instance.//w w w  .  j  a v  a  2s  .com
 *
 * @param instance the instance to be classified
 * @return preedicted class probability distribution
 * @throws Exception if distribution can't be computed successfully 
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    double[] sums = new double[instance.numClasses()], newProbs;

    double numPreds = 0;
    for (int i = 0; i < m_NumIterations; i++) {
        if (instance.classAttribute().isNumeric() == true) {
            double pred = ((NewTree) m_Classifiers[i]).classifyInstance(instance);
            if (!Utils.isMissingValue(pred)) {
                sums[0] += pred;
                numPreds++;
            }
        } else {
            newProbs = ((NewTree) m_Classifiers[i]).distributionForInstance(instance);
            for (int j = 0; j < newProbs.length; j++)
                sums[j] += newProbs[j];
        }
    }
    if (instance.classAttribute().isNumeric() == true) {
        if (numPreds == 0) {
            sums[0] = Utils.missingValue();
        } else {
            sums[0] /= numPreds;
        }
        return sums;
    } else if (Utils.eq(Utils.sum(sums), 0)) {
        return sums;
    } else {
        Utils.normalize(sums);
        return sums;
    }
}

From source file:BaggingImprove.java

/**
 * Bagging method.//from   w  w  w.j  a v  a 2  s . c o m
 *
 * @param data the training data to be used for generating the bagged
 * classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    //data.deleteWithMissingClass();

    super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }
    //+
    System.out.println("Classifier length" + m_Classifiers.length);

    int bagSize = data.numInstances() * m_BagSizePercent / 100;
    //+
    System.out.println("Bag Size " + bagSize);

    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag) {
        inBag = new boolean[m_Classifiers.length][];
    }

    //+
    //inisialisasi nama penamaan model
    BufferedWriter writer = new BufferedWriter(new FileWriter("Bootstrap.txt"));

    for (int j = 0; j < m_Classifiers.length; j++) {

        Instances bagData = null;

        // create the in-bag dataset
        if (m_CalcOutOfBag) {
            inBag[j] = new boolean[data.numInstances()];

            //System.out.println("Inbag1 " + inBag[0][1]);
            //bagData = resampleWithWeights(data, random, inBag[j]);
            bagData = data.resampleWithWeights(random, inBag[j]);
            //System.out.println("num after resample " + bagData.numInstances());
            //+
            //                for (int k = 0; k < bagData.numInstances(); k++) {
            //                    System.out.println("Bag Data after resample [calc out bag]" + bagData.instance(k));
            //                }

        } else {
            //+
            System.out.println("Not m_Calc out of bag");
            System.out.println("Please configure code inside!");

            bagData = data.resampleWithWeights(random);
            if (bagSize < data.numInstances()) {
                bagData.randomize(random);
                Instances newBagData = new Instances(bagData, 0, bagSize);
                bagData = newBagData;
            }
        }

        if (m_Classifier instanceof Randomizable) {
            //+
            System.out.println("Randomizable");
            ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
        }

        //write bootstrap into file
        writer.write("Bootstrap " + j);
        writer.newLine();
        writer.write(bagData.toString());
        writer.newLine();

        System.out.println("Berhasil menyimpan bootstrap ke file ");

        System.out.println("Bootstrap " + j + 1);
        //            textarea.append("\nBootsrap " + (j + 1));
        //System.out.println("num instance kedua kali "+bagData.numInstances());

        for (int b = 1; b < bagData.numInstances(); b++) {
            System.out.println("" + bagData.instance(b));
            //                textarea.append("\n" + bagData.instance(b));
        }
        //            //+

        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);
        //            //+
        //            
        //            SerializationHelper serialization = new SerializationHelper();
        //            serialization.write("KnnData"+model+".model", m_Classifiers[j]);
        //            System.out.println("Finish write into model");
        //            model++;
    }

    writer.flush();
    writer.close();
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric) {
                votes = new double[1];
            } else {
                votes = new double[data.numClasses()];
            }

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i]) {
                    continue;
                }
                voteCount++;
                // double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric) {
                    // votes[0] += pred;
                    votes[0] = m_Classifiers[j].classifyInstance(data.instance(i));
                } else {
                    // votes[(int) pred]++;
                    double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i));
                    //-
                    //                        for(double a : newProbs)
                    //                        {
                    //                            System.out.println("Double new probs %.f "+a);
                    //                        }
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }

                }
            }
            System.out.println("Vote count %d" + voteCount);

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                } else {
                    Utils.normalize(votes);

                }
                vote = Utils.maxIndex(votes); // predicted class
                //-
                System.out.println("Vote " + vote);

            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else if (vote != data.instance(i).classValue()) {
                //+
                System.out.println("Vote terakhir" + data.instance(i).classValue());
                errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}

From source file:BaggingImprove.java

/**
 * Calculates the class membership probabilities for the given test
 * instance.//from w  w  w  .j  a  va 2  s  .  c o m
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 * @throws Exception if distribution can't be computed successfully
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    double[] sums = new double[instance.numClasses()], newProbs;
    //-
    //System.out.println("\nDistribution For Instance\n");
    for (int i = 0; i < m_NumIterations; i++) {
        if (instance.classAttribute().isNumeric() == true) {
            //System.out.println(m_Classifiers[i].classifyInstance(instance));
            sums[0] += m_Classifiers[i].classifyInstance(instance);
        } else {
            //System.out.println(m_Classifiers[i].distributionForInstance(instance));
            newProbs = m_Classifiers[i].distributionForInstance(instance);
            //-
            //                for (int j = 0; j < newProbs.length; j++) {
            //                    sums[j] += newProbs[j];
            //                    System.out.println("Sums "+sums[j]);
            //                }
            //+

        }
    }
    if (instance.classAttribute().isNumeric() == true) {
        sums[0] /= m_NumIterations;
        return sums;
    } else if (Utils.eq(Utils.sum(sums), 0)) {
        return sums;
    } else {
        Utils.normalize(sums);
        return sums;
    }
}

From source file:MultiClassClassifier.java

License:Open Source License

/**
 * Returns the distribution for an instance.
 *
 * @param inst the instance to get the distribution for
 * @return the distribution/* ww  w.  j  a v a2 s.  c om*/
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance inst) throws Exception {

    if (m_Classifiers.length == 1) {
        return m_Classifiers[0].distributionForInstance(inst);
    }

    double[] probs = new double[inst.numClasses()];

    if (m_Method == METHOD_1_AGAINST_1) {
        double[][] r = new double[inst.numClasses()][inst.numClasses()];
        double[][] n = new double[inst.numClasses()][inst.numClasses()];

        for (int i = 0; i < m_ClassFilters.length; i++) {
            if (m_Classifiers[i] != null) {
                Instance tempInst = (Instance) inst.copy();
                tempInst.setDataset(m_TwoClassDataset);
                double[] current = m_Classifiers[i].distributionForInstance(tempInst);
                Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
                range.setUpper(m_ClassAttribute.numValues());
                int[] pair = range.getSelection();
                if (m_pairwiseCoupling && inst.numClasses() > 2) {
                    r[pair[0]][pair[1]] = current[0];
                    n[pair[0]][pair[1]] = m_SumOfWeights[i];
                } else {
                    if (current[0] > current[1]) {
                        probs[pair[0]] += 1.0;
                    } else {
                        probs[pair[1]] += 1.0;
                    }
                }
            }
        }
        if (m_pairwiseCoupling && inst.numClasses() > 2) {
            return pairwiseCoupling(n, r);
        }
    } else {
        // error correcting style methods
        for (int i = 0; i < m_ClassFilters.length; i++) {
            m_ClassFilters[i].input(inst);
            m_ClassFilters[i].batchFinished();
            double[] current = m_Classifiers[i].distributionForInstance(m_ClassFilters[i].output());
            //Calibrate the binary classifier scores

            for (int j = 0; j < m_ClassAttribute.numValues(); j++) {
                if (((MakeIndicator) m_ClassFilters[i]).getValueRange().isInRange(j)) {
                    probs[j] += current[1];
                } else {
                    probs[j] += current[0];
                }
            }
        }
    }

    if (Utils.gr(Utils.sum(probs), 0)) {
        Utils.normalize(probs);
        return probs;
    } else {
        return m_ZeroR.distributionForInstance(inst);
    }
}

From source file:MultiClassClassifier.java

License:Open Source License

/**
 * Implements pairwise coupling.//w  w  w .j  a v a  2s  . co m
 *
 * @param n the sum of weights used to train each model
 * @param r the probability estimate from each model
 * @return the coupled estimates
 */
public static double[] pairwiseCoupling(double[][] n, double[][] r) {

    // Initialize p and u array
    double[] p = new double[r.length];
    for (int i = 0; i < p.length; i++) {
        p[i] = 1.0 / (double) p.length;
    }
    double[][] u = new double[r.length][r.length];
    for (int i = 0; i < r.length; i++) {
        for (int j = i + 1; j < r.length; j++) {
            u[i][j] = 0.5;
        }
    }

    // firstSum doesn't change
    double[] firstSum = new double[p.length];
    for (int i = 0; i < p.length; i++) {
        for (int j = i + 1; j < p.length; j++) {
            firstSum[i] += n[i][j] * r[i][j];
            firstSum[j] += n[i][j] * (1 - r[i][j]);
        }
    }

    // Iterate until convergence
    boolean changed;
    do {
        changed = false;
        double[] secondSum = new double[p.length];
        for (int i = 0; i < p.length; i++) {
            for (int j = i + 1; j < p.length; j++) {
                secondSum[i] += n[i][j] * u[i][j];
                secondSum[j] += n[i][j] * (1 - u[i][j]);
            }
        }
        for (int i = 0; i < p.length; i++) {
            if ((firstSum[i] == 0) || (secondSum[i] == 0)) {
                if (p[i] > 0) {
                    changed = true;
                }
                p[i] = 0;
            } else {
                double factor = firstSum[i] / secondSum[i];
                double pOld = p[i];
                p[i] *= factor;
                if (Math.abs(pOld - p[i]) > 1.0e-3) {
                    changed = true;
                }
            }
        }
        Utils.normalize(p);
        for (int i = 0; i < r.length; i++) {
            for (int j = i + 1; j < r.length; j++) {
                u[i][j] = p[i] / (p[i] + p[j]);
            }
        }
    } while (changed);
    return p;
}

From source file:SMO.java

License:Open Source License

/**
 * Estimates class probabilities for given instance.
 * //from   w  ww. j a  v a 2s  .com
 * @param inst the instance to compute the probabilities for
 * @throws Exception in case of an error
 */
public double[] distributionForInstance(Instance inst) throws Exception {

    // Filter instance
    if (!m_checksTurnedOff) {
        m_Missing.input(inst);
        m_Missing.batchFinished();
        inst = m_Missing.output();
    }

    if (m_NominalToBinary != null) {
        m_NominalToBinary.input(inst);
        m_NominalToBinary.batchFinished();
        inst = m_NominalToBinary.output();
    }

    if (m_Filter != null) {
        m_Filter.input(inst);
        m_Filter.batchFinished();
        inst = m_Filter.output();
    }

    if (!m_fitLogisticModels) {
        double[] result = new double[inst.numClasses()];
        for (int i = 0; i < inst.numClasses(); i++) {
            for (int j = i + 1; j < inst.numClasses(); j++) {
                if ((m_classifiers[i][j].m_alpha != null) || (m_classifiers[i][j].m_sparseWeights != null)) {
                    double output = m_classifiers[i][j].SVMOutput(-1, inst);
                    if (output > 0) {
                        result[j] += 1;
                    } else {
                        result[i] += 1;
                    }
                }
            }
        }
        Utils.normalize(result);
        return result;
    } else {

        // We only need to do pairwise coupling if there are more
        // then two classes.
        if (inst.numClasses() == 2) {
            double[] newInst = new double[2];
            newInst[0] = m_classifiers[0][1].SVMOutput(-1, inst);
            newInst[1] = Instance.missingValue();
            return m_classifiers[0][1].m_logistic.distributionForInstance(new Instance(1, newInst));
        }
        double[][] r = new double[inst.numClasses()][inst.numClasses()];
        double[][] n = new double[inst.numClasses()][inst.numClasses()];
        for (int i = 0; i < inst.numClasses(); i++) {
            for (int j = i + 1; j < inst.numClasses(); j++) {
                if ((m_classifiers[i][j].m_alpha != null) || (m_classifiers[i][j].m_sparseWeights != null)) {
                    double[] newInst = new double[2];
                    newInst[0] = m_classifiers[i][j].SVMOutput(-1, inst);
                    newInst[1] = Instance.missingValue();
                    r[i][j] = m_classifiers[i][j].m_logistic
                            .distributionForInstance(new Instance(1, newInst))[0];
                    n[i][j] = m_classifiers[i][j].m_sumOfWeights;
                }
            }
        }
        return weka.classifiers.meta.MultiClassClassifier.pairwiseCoupling(n, r);
    }
}

From source file:ID3Chi.java

License:Open Source License

private void MakeALeaf(Instances data) {

    data.deleteWithMissing(m_Attribute);

    if (data.numInstances() == 0) {
        SetNullDistribution(data);/*w w w.  j a va  2 s. com*/
        return;
    }

    m_Distribution = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        m_Distribution[(int) inst.classValue()]++;
    }
    Utils.normalize(m_Distribution);
    m_ClassValue = Utils.maxIndex(m_Distribution);
    m_ClassAttribute = data.classAttribute();

    // set m_Attribute to null to mark this node as a leaf
    m_Attribute = null;
}

From source file:adams.flow.transformer.WekaInstancesInfo.java

License:Open Source License

/**
 * Executes the flow item./*from w ww. j a va2 s .c  o  m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances inst;
    int index;
    int labelIndex;
    double[] dist;
    Enumeration enm;
    int i;

    result = null;

    if (m_InputToken.getPayload() instanceof Instance)
        inst = ((Instance) m_InputToken.getPayload()).dataset();
    else
        inst = (Instances) m_InputToken.getPayload();
    m_AttributeIndex.setData(inst);
    index = m_AttributeIndex.getIntIndex();

    m_Queue.clear();

    switch (m_Type) {
    case FULL:
        m_Queue.add(inst.toSummaryString());
        break;

    case FULL_ATTRIBUTE:
        m_Queue.add(getAttributeStats(inst, index));
        break;

    case FULL_CLASS:
        if (inst.classIndex() > -1)
            m_Queue.add(getAttributeStats(inst, inst.classIndex()));
        break;

    case HEADER:
        m_Queue.add(new Instances(inst, 0).toString());
        break;

    case RELATION_NAME:
        m_Queue.add(inst.relationName());
        break;

    case ATTRIBUTE_NAME:
        if (index != -1)
            m_Queue.add(inst.attribute(index).name());
        break;

    case ATTRIBUTE_NAMES:
        for (i = 0; i < inst.numAttributes(); i++)
            m_Queue.add(inst.attribute(i).name());
        break;

    case LABELS:
        if (index != -1) {
            enm = inst.attribute(index).enumerateValues();
            while (enm.hasMoreElements())
                m_Queue.add(enm.nextElement());
        }
        break;

    case CLASS_LABELS:
        if (inst.classIndex() > -1) {
            enm = inst.classAttribute().enumerateValues();
            while (enm.hasMoreElements())
                m_Queue.add(enm.nextElement());
        }
        break;

    case LABEL_COUNT:
        if (index > -1) {
            m_LabelIndex.setData(inst.attribute(index));
            labelIndex = m_LabelIndex.getIntIndex();
            m_Queue.add(inst.attributeStats(index).nominalCounts[labelIndex]);
        }
        break;

    case LABEL_COUNTS:
        if (index > -1)
            m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(index).nominalCounts));
        break;

    case LABEL_DISTRIBUTION:
        if (index > -1) {
            dist = new double[inst.attributeStats(index).nominalCounts.length];
            for (i = 0; i < dist.length; i++)
                dist[i] = inst.attributeStats(index).nominalCounts[i];
            Utils.normalize(dist);
            m_Queue.add(StatUtils.toNumberArray(dist));
        }
        break;

    case CLASS_LABEL_COUNT:
        if (inst.classIndex() > -1) {
            m_LabelIndex.setData(inst.classAttribute());
            labelIndex = m_LabelIndex.getIntIndex();
            m_Queue.add(inst.attributeStats(inst.classIndex()).nominalCounts[labelIndex]);
        }
        break;

    case CLASS_LABEL_COUNTS:
        if (inst.classIndex() > -1)
            m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(inst.classIndex()).nominalCounts));
        break;

    case CLASS_LABEL_DISTRIBUTION:
        if (inst.classIndex() > -1) {
            dist = new double[inst.attributeStats(inst.classIndex()).nominalCounts.length];
            for (i = 0; i < dist.length; i++)
                dist[i] = inst.attributeStats(inst.classIndex()).nominalCounts[i];
            Utils.normalize(dist);
            m_Queue.add(StatUtils.toNumberArray(dist));
        }
        break;

    case NUM_ATTRIBUTES:
        m_Queue.add(inst.numAttributes());
        break;

    case NUM_INSTANCES:
        m_Queue.add(inst.numInstances());
        break;

    case NUM_CLASS_LABELS:
        if ((inst.classIndex() != -1) && inst.classAttribute().isNominal())
            m_Queue.add(inst.classAttribute().numValues());
        break;

    case NUM_LABELS:
        if ((index != -1) && inst.attribute(index).isNominal())
            m_Queue.add(inst.attribute(index).numValues());
        break;

    case NUM_DISTINCT_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).distinctCount);
        break;

    case NUM_UNIQUE_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).uniqueCount);
        break;

    case NUM_MISSING_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).missingCount);
        break;

    case MIN:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.min);
        break;

    case MAX:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.max);
        break;

    case MEAN:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.mean);
        break;

    case STDEV:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.stdDev);
        break;

    case ATTRIBUTE_TYPE:
        if (index != -1)
            m_Queue.add(Attribute.typeToString(inst.attribute(index)));
        break;

    case CLASS_TYPE:
        if (inst.classIndex() != -1)
            m_Queue.add(Attribute.typeToString(inst.classAttribute()));
        break;

    default:
        result = "Unhandled info type: " + m_Type;
    }

    return result;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Generates the classifier.//ww  w . jav  a2s  .  c om
 *
 * @param instances set of instances serving as training data 
 * @throws Exception if the classifier has not been generated successfully
 */
public void buildClassifier(Instances instances) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal;
    double bestPoint = -Double.MAX_VALUE;
    int bestAtt = -1, numClasses;

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (instances.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(instances);
        return;
    } else {
        m_ZeroR = null;
    }

    double[][] bestDist = new double[3][instances.numClasses()];

    m_Instances = new Instances(instances);

    if (m_Instances.classAttribute().isNominal()) {
        numClasses = m_Instances.numClasses();
    } else {
        numClasses = 1;
    }

    // For each attribute
    boolean first = true;
    for (int i = 0; i < m_Instances.numAttributes(); i++) {
        if (i != m_Instances.classIndex()) {

            // Reserve space for distribution.
            m_Distribution = new double[3][numClasses];

            // Compute value of criterion for best split on attribute
            if (m_Instances.attribute(i).isNominal()) {
                currVal = findSplitNominal(i);
            } else {
                currVal = findSplitNumeric(i);
            }
            if ((first) || (currVal < bestVal)) {
                bestVal = currVal;
                bestAtt = i;
                bestPoint = m_SplitPoint;
                for (int j = 0; j < 3; j++) {
                    System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, numClasses);
                }
            }

            // First attribute has been investigated
            first = false;
        }
    }

    // Set attribute, split point and distribution.
    m_AttIndex = bestAtt;
    m_SplitPoint = bestPoint;
    m_Distribution = bestDist;
    if (m_Instances.classAttribute().isNominal()) {
        for (int i = 0; i < m_Distribution.length; i++) {
            double sumCounts = Utils.sum(m_Distribution[i]);
            if (sumCounts == 0) { // This means there were only missing attribute values
                System.arraycopy(m_Distribution[2], 0, m_Distribution[i], 0, m_Distribution[2].length);
                Utils.normalize(m_Distribution[i]);
            } else {
                Utils.normalize(m_Distribution[i], sumCounts);
            }
        }
    }

    // Save memory
    m_Instances = new Instances(m_Instances, 0);
}