Example usage for weka.core Utils maxIndex

List of usage examples for weka.core Utils maxIndex

Introduction

In this page you can find the example usage for weka.core Utils maxIndex.

Prototype

public staticint maxIndex(int[] ints) 

Source Link

Document

Returns index of maximum element in a given array of integers.

Usage

From source file:Bilbo.java

License:Open Source License

/**
 * Bagging method.//from  w ww.ja v  a2  s  . c  o m
 *
 * @param data the training data to be used for generating the
 * bagged classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data, Instances p_unlabeledData) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // Has user asked to represent copies using weights?
    if (getRepresentCopiesUsingWeights() && !(m_Classifier instanceof WeightedInstancesHandler)) {
        throw new IllegalArgumentException("Cannot represent copies using weights when "
                + "base learner in bagging does not implement " + "WeightedInstancesHandler.");
    }

    // get fresh Instances object
    m_data = new Instances(data);
    m_unlabeledData = new Instances(p_unlabeledData);

    super.buildClassifier(m_data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    m_random = new Random(m_Seed);

    m_inBag = null;
    if (m_CalcOutOfBag)
        m_inBag = new boolean[m_Classifiers.length][];

    for (int j = 0; j < m_Classifiers.length; j++) {
        if (m_Classifier instanceof Randomizable) {
            ((Randomizable) m_Classifiers[j]).setSeed(m_random.nextInt());
        }
    }
    //Insert oracle loop here TODO

    buildClassifiers();
    Instances inst = new Instances(m_data);
    for (int i = 0; i < m_Classifiers.length; i++) {
        inst.clear();
        ((NewTree) m_Classifiers[i]).GetTransductedInstances(inst);
        ((NewTree) m_Classifiers[i]).DoInduction(inst);
        // Ehm, do something boyski
    }
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = m_data.classAttribute().isNumeric();

        for (int i = 0; i < m_data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[m_data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (m_inBag[j][i])
                    continue;

                if (numeric) {
                    double pred = ((NewTree) m_Classifiers[j]).classifyInstance(m_data.instance(i));
                    if (!Utils.isMissingValue(pred)) {
                        votes[0] += pred;
                        voteCount++;
                    }
                } else {
                    voteCount++;
                    double[] newProbs = ((NewTree) m_Classifiers[j])
                            .distributionForInstance(m_data.instance(i));
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }
                }
            }

            // "vote"
            if (numeric) {
                if (voteCount == 0) {
                    vote = Utils.missingValue();
                } else {
                    vote = votes[0] / voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                    vote = Utils.missingValue();
                } else {
                    vote = Utils.maxIndex(votes); // predicted class
                    Utils.normalize(votes);
                }
            }

            // error for instance
            if (!Utils.isMissingValue(vote) && !m_data.instance(i).classIsMissing()) {
                outOfBagCount += m_data.instance(i).weight();
                if (numeric) {
                    errorSum += (StrictMath.abs(vote - m_data.instance(i).classValue())
                            * m_data.instance(i).weight()) / m_data.instance(i).classValue();
                } else {
                    if (vote != m_data.instance(i).classValue())
                        errorSum += m_data.instance(i).weight();
                }
            }
        }

        if (outOfBagCount > 0) {
            m_OutOfBagError = errorSum / outOfBagCount;
        }
    } else {
        m_OutOfBagError = 0;
    }

    // save memory
    m_data = null;
}

From source file:BaggingImprove.java

/**
 * Bagging method.//from   w ww  .  j av a2 s  .  c o  m
 *
 * @param data the training data to be used for generating the bagged
 * classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    //data.deleteWithMissingClass();

    super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }
    //+
    System.out.println("Classifier length" + m_Classifiers.length);

    int bagSize = data.numInstances() * m_BagSizePercent / 100;
    //+
    System.out.println("Bag Size " + bagSize);

    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag) {
        inBag = new boolean[m_Classifiers.length][];
    }

    //+
    //inisialisasi nama penamaan model
    BufferedWriter writer = new BufferedWriter(new FileWriter("Bootstrap.txt"));

    for (int j = 0; j < m_Classifiers.length; j++) {

        Instances bagData = null;

        // create the in-bag dataset
        if (m_CalcOutOfBag) {
            inBag[j] = new boolean[data.numInstances()];

            //System.out.println("Inbag1 " + inBag[0][1]);
            //bagData = resampleWithWeights(data, random, inBag[j]);
            bagData = data.resampleWithWeights(random, inBag[j]);
            //System.out.println("num after resample " + bagData.numInstances());
            //+
            //                for (int k = 0; k < bagData.numInstances(); k++) {
            //                    System.out.println("Bag Data after resample [calc out bag]" + bagData.instance(k));
            //                }

        } else {
            //+
            System.out.println("Not m_Calc out of bag");
            System.out.println("Please configure code inside!");

            bagData = data.resampleWithWeights(random);
            if (bagSize < data.numInstances()) {
                bagData.randomize(random);
                Instances newBagData = new Instances(bagData, 0, bagSize);
                bagData = newBagData;
            }
        }

        if (m_Classifier instanceof Randomizable) {
            //+
            System.out.println("Randomizable");
            ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
        }

        //write bootstrap into file
        writer.write("Bootstrap " + j);
        writer.newLine();
        writer.write(bagData.toString());
        writer.newLine();

        System.out.println("Berhasil menyimpan bootstrap ke file ");

        System.out.println("Bootstrap " + j + 1);
        //            textarea.append("\nBootsrap " + (j + 1));
        //System.out.println("num instance kedua kali "+bagData.numInstances());

        for (int b = 1; b < bagData.numInstances(); b++) {
            System.out.println("" + bagData.instance(b));
            //                textarea.append("\n" + bagData.instance(b));
        }
        //            //+

        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);
        //            //+
        //            
        //            SerializationHelper serialization = new SerializationHelper();
        //            serialization.write("KnnData"+model+".model", m_Classifiers[j]);
        //            System.out.println("Finish write into model");
        //            model++;
    }

    writer.flush();
    writer.close();
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric) {
                votes = new double[1];
            } else {
                votes = new double[data.numClasses()];
            }

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i]) {
                    continue;
                }
                voteCount++;
                // double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric) {
                    // votes[0] += pred;
                    votes[0] = m_Classifiers[j].classifyInstance(data.instance(i));
                } else {
                    // votes[(int) pred]++;
                    double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i));
                    //-
                    //                        for(double a : newProbs)
                    //                        {
                    //                            System.out.println("Double new probs %.f "+a);
                    //                        }
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }

                }
            }
            System.out.println("Vote count %d" + voteCount);

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                } else {
                    Utils.normalize(votes);

                }
                vote = Utils.maxIndex(votes); // predicted class
                //-
                System.out.println("Vote " + vote);

            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else if (vote != data.instance(i).classValue()) {
                //+
                System.out.println("Vote terakhir" + data.instance(i).classValue());
                errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}

From source file:ID3Chi.java

License:Open Source License

/**
 * Method for building an ID3Chi tree./*www .  ja va  2s .  c  om*/
 *
 * @param data
 *            the training data
 * @exception Exception
 *                if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    /*
    if (data.numInstances() == 0) {
       m_Attribute = null;
       m_ClassValue = Instance.missingValue();
       m_Distribution = new double[data.numClasses()];
       return;
    }
    /**/
    if (data.numInstances() == 0) {
        SetNullDistribution(data);
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    double entropyOfAllData = computeEntropy(data);

    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att, entropyOfAllData);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    double chiSquare = computeChiSquare(data, m_Attribute);

    int degreesOfFreedom = m_Attribute.numValues() - 1;
    ChiSquaredDistribution chi = new ChiSquaredDistribution(degreesOfFreedom);
    double threshold = chi.inverseCumulativeProbability(m_confidenceLevel);

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        MakeALeaf(data);
    } else {
        // Discard unknown values for selected attribute
        //data.deleteWithMissing(m_Attribute);
        Instances[] subset = splitData(data, m_Attribute);

        if (CheckIfCanApplyChiSquare(subset) && (chiSquare <= threshold)) {
            MakeALeaf(data);
            return;
        }

        m_Successors = new ID3Chi[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new ID3Chi(this.m_confidenceLevel);
            m_Successors[j].m_Ratio = (double) subset[j].numInstances() / (double) data.numInstances();
            m_Successors[j].makeTree(subset[j]);
        }
    }
}

From source file:ID3Chi.java

License:Open Source License

private void MakeALeaf(Instances data) {

    data.deleteWithMissing(m_Attribute);

    if (data.numInstances() == 0) {
        SetNullDistribution(data);/* www .j  a v  a 2s. com*/
        return;
    }

    m_Distribution = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        m_Distribution[(int) inst.classValue()]++;
    }
    Utils.normalize(m_Distribution);
    m_ClassValue = Utils.maxIndex(m_Distribution);
    m_ClassAttribute = data.classAttribute();

    // set m_Attribute to null to mark this node as a leaf
    m_Attribute = null;
}

From source file:ID3Chi.java

License:Open Source License

/**
 * Classifies a given test instance using the decision tree.
 *
 * @param instance//from w  w w.j a  v  a  2s .  c  om
 *            the instance to be classified
 * @return the classification
 * @throws NoSupportForMissingValuesException
 *             if instance has missing values
 */
public double classifyInstance(Instance instance) {

    double[] tokenDistribution = classifyInstanceWithToken(instance, 1.0);
    return Utils.maxIndex(tokenDistribution);
}

From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java

License:Open Source License

/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that
 * are not being set via options.//from  www  .j a  v  a  2s  .c om
 * 
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been generated successfully
 */
@Override
public void buildClusterer(Instances data) throws Exception {

    if (!SESAME.SESAME_GUI)
        MyFirstClusterer.weka_gui = true;

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    m_Iterations = 0;

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);

    instances.setClassIndex(-1);
    if (!m_dontReplaceMissing) {
        m_ReplaceMissingFilter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
    }

    m_FullMissingCounts = new int[instances.numAttributes()];
    if (m_displayStdDevs) {
        m_FullStdDevs = new double[instances.numAttributes()];
    }
    m_FullNominalCounts = new int[instances.numAttributes()][0];

    m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false);
    for (int i = 0; i < instances.numAttributes(); i++) {
        m_FullMissingCounts[i] = instances.attributeStats(i).missingCount;
        if (instances.attribute(i).isNumeric()) {
            if (m_displayStdDevs) {
                m_FullStdDevs[i] = Math.sqrt(instances.variance(i));
            }
            if (m_FullMissingCounts[i] == instances.numInstances()) {
                m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean
            }
        } else {
            m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts;
            if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) {
                m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common
                                                     // value
            }
        }
    }

    m_ClusterCentroids = new Instances(instances, m_NumClusters);
    int[] clusterAssignments = new int[instances.numInstances()];

    if (m_PreserveOrder) {
        m_Assignments = clusterAssignments;
    }

    m_DistanceFunction.setInstances(instances);

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap initC = new HashMap();
    DecisionTableHashKey hk = null;

    Instances initInstances = null;
    if (m_PreserveOrder) {
        initInstances = new Instances(instances);
    } else {
        initInstances = instances;
    }

    for (int j = initInstances.numInstances() - 1; j >= 0; j--) {
        instIndex = RandomO.nextInt(j + 1);
        hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true);
        if (!initC.containsKey(hk)) {
            m_ClusterCentroids.add(initInstances.instance(instIndex));
            initC.put(hk, null);
        }
        initInstances.swap(j, instIndex);

        if (m_ClusterCentroids.numInstances() == m_NumClusters) {
            break;
        }
    }

    m_NumClusters = m_ClusterCentroids.numInstances();

    // removing reference
    initInstances = null;

    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
    m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()];
    while (!converged) {
        emptyClusterCount = 0;
        m_Iterations++;
        converged = true;
        for (i = 0; i < instances.numInstances(); i++) {
            Instance toCluster = instances.instance(i);
            int newC = clusterProcessedInstance(toCluster, true);
            if (newC != clusterAssignments[i]) {
                converged = false;
            }
            clusterAssignments[i] = newC;
        }

        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        for (i = 0; i < m_NumClusters; i++) {
            if (tempI[i].numInstances() == 0) {
                // empty cluster
                emptyClusterCount++;
            } else {
                moveCentroid(i, tempI[i], true);
            }
        }

        if (m_Iterations == m_MaxIterations) {
            converged = true;
        }

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            if (converged) {
                Instances[] t = new Instances[m_NumClusters];
                int index = 0;
                for (int k = 0; k < tempI.length; k++) {
                    if (tempI[k].numInstances() > 0) {
                        t[index] = tempI[k];

                        for (i = 0; i < tempI[k].numAttributes(); i++) {
                            m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i];
                        }
                        index++;
                    }
                }
                tempI = t;
            } else {
                tempI = new Instances[m_NumClusters];
            }
        }

        if (!converged) {
            m_squaredErrors = new double[m_NumClusters];
            m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
        }
    }

    if (m_displayStdDevs) {
        m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    }
    m_ClusterSizes = new int[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        if (m_displayStdDevs) {
            double[] vals2 = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); j++) {
                if (instances.attribute(j).isNumeric()) {
                    vals2[j] = Math.sqrt(tempI[i].variance(j));
                } else {
                    vals2[j] = Instance.missingValue();
                }
            }
            m_ClusterStdDevs.add(new Instance(1.0, vals2));
        }
        m_ClusterSizes[i] = tempI[i].numInstances();
    }

    // Save memory!!
    m_DistanceFunction.clean();

    if (!SESAME.SESAME_GUI)
        MyFirstClusterer.weka_gui = true;
}

From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java

License:Open Source License

/**
 * Move the centroid to it's new coordinates. Generate the centroid
 * coordinates based on it's members (objects assigned to the cluster of the
 * centroid) and the distance function being used.
 * // www.jav  a 2s .c o  m
 * @param centroidIndex index of the centroid which the coordinates will be
 *          computed
 * @param members the objects that are assigned to the cluster of this
 *          centroid
 * @param updateClusterInfo if the method is supposed to update the m_Cluster
 *          arrays
 * @return the centroid coordinates
 */
protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) {
    double[] vals = new double[members.numAttributes()];

    for (int j = 0; j < members.numAttributes(); j++) {

        // The centroid is the mean point. If the attribute is nominal, the centroid is the mode
        if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir
                || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir
                || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir
                || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir
                || members.attribute(j).isNominal()) {
            vals[j] = members.meanOrMode(j);
        }

        if (updateClusterInfo) {
            m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount;
            m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts;
            if (members.attribute(j).isNominal()) {
                if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils
                        .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) {
                    vals[j] = Instance.missingValue(); // mark mode as missing
                }
            } else {
                if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) {
                    vals[j] = Instance.missingValue(); // mark mean as missing
                }
            }
        }
    }
    if (updateClusterInfo) {
        m_ClusterCentroids.add(new Instance(1.0, vals));
    }
    return vals;
}

From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java

License:Open Source License

/**
 * return a string describing this clusterer
 * /*w  ww  . j a  v a 2s  .  co m*/
 * @return a description of the clusterer as a string
 */
@Override
public String toString() {
    if (m_ClusterCentroids == null) {
        return "No clusterer built yet!";
    }

    int maxWidth = 0;
    int maxAttWidth = 0;
    boolean containsNumeric = false;
    for (int i = 0; i < m_NumClusters; i++) {
        for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {
            if (m_ClusterCentroids.attribute(j).name().length() > maxAttWidth) {
                maxAttWidth = m_ClusterCentroids.attribute(j).name().length();
            }
            if (m_ClusterCentroids.attribute(j).isNumeric()) {
                containsNumeric = true;
                double width = Math.log(Math.abs(m_ClusterCentroids.instance(i).value(j))) / Math.log(10.0);
                // System.err.println(m_ClusterCentroids.instance(i).value(j)+" "+width);
                if (width < 0) {
                    width = 1;
                }
                // decimal + # decimal places + 1
                width += 6.0;
                if ((int) width > maxWidth) {
                    maxWidth = (int) width;
                }
            }
        }
    }

    for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {
        if (m_ClusterCentroids.attribute(i).isNominal()) {
            Attribute a = m_ClusterCentroids.attribute(i);
            for (int j = 0; j < m_ClusterCentroids.numInstances(); j++) {
                String val = a.value((int) m_ClusterCentroids.instance(j).value(i));
                if (val.length() > maxWidth) {
                    maxWidth = val.length();
                }
            }
            for (int j = 0; j < a.numValues(); j++) {
                String val = a.value(j) + " ";
                if (val.length() > maxAttWidth) {
                    maxAttWidth = val.length();
                }
            }
        }
    }

    if (m_displayStdDevs) {
        // check for maximum width of maximum frequency count
        for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {
            if (m_ClusterCentroids.attribute(i).isNominal()) {
                int maxV = Utils.maxIndex(m_FullNominalCounts[i]);
                /*
                 * int percent = (int)((double)m_FullNominalCounts[i][maxV] /
                 * Utils.sum(m_ClusterSizes) * 100.0);
                 */
                int percent = 6; // max percent width (100%)
                String nomV = "" + m_FullNominalCounts[i][maxV];
                // + " (" + percent + "%)";
                if (nomV.length() + percent > maxWidth) {
                    maxWidth = nomV.length() + 1;
                }
            }
        }
    }

    // check for size of cluster sizes
    for (int m_ClusterSize : m_ClusterSizes) {
        String size = "(" + m_ClusterSize + ")";
        if (size.length() > maxWidth) {
            maxWidth = size.length();
        }
    }

    if (m_displayStdDevs && maxAttWidth < "missing".length()) {
        maxAttWidth = "missing".length();
    }

    String plusMinus = "+/-";
    maxAttWidth += 2;
    if (m_displayStdDevs && containsNumeric) {
        maxWidth += plusMinus.length();
    }
    if (maxAttWidth < "Attribute".length() + 2) {
        maxAttWidth = "Attribute".length() + 2;
    }

    if (maxWidth < "Full Data".length()) {
        maxWidth = "Full Data".length() + 1;
    }

    if (maxWidth < "missing".length()) {
        maxWidth = "missing".length() + 1;
    }

    StringBuffer temp = new StringBuffer();
    // String naString = "N/A";

    /*
     * for (int i = 0; i < maxWidth+2; i++) { naString += " "; }
     */
    temp.append("\nkMeans\n======\n");
    temp.append("\nNumber of iterations: " + m_Iterations + "\n");

    if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir
            || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir
            || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir
            || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir) {
        temp.append("Within cluster sum of squared errors: " + Utils.sum(m_squaredErrors));
    } else {
        temp.append("Sum of within cluster distances: " + Utils.sum(m_squaredErrors));
    }

    if (!m_dontReplaceMissing) {
        temp.append("\nMissing values globally replaced with mean/mode");
    }

    temp.append("\n\nCluster centroids:\n");
    temp.append(pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2)) - "Cluster#".length(), true));

    temp.append("\n");
    temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false));

    temp.append(pad("Full Data", " ", maxWidth + 1 - "Full Data".length(), true));

    // cluster numbers
    for (int i = 0; i < m_NumClusters; i++) {
        String clustNum = "" + i;
        temp.append(pad(clustNum, " ", maxWidth + 1 - clustNum.length(), true));
    }
    temp.append("\n");

    // cluster sizes
    String cSize = "(" + Utils.sum(m_ClusterSizes) + ")";
    temp.append(pad(cSize, " ", maxAttWidth + maxWidth + 1 - cSize.length(), true));
    for (int i = 0; i < m_NumClusters; i++) {
        cSize = "(" + m_ClusterSizes[i] + ")";
        temp.append(pad(cSize, " ", maxWidth + 1 - cSize.length(), true));
    }
    temp.append("\n");

    temp.append(pad("", "=", maxAttWidth
            + (maxWidth * (m_ClusterCentroids.numInstances() + 1) + m_ClusterCentroids.numInstances() + 1),
            true));
    temp.append("\n");

    for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {
        String attName = m_ClusterCentroids.attribute(i).name();
        temp.append(attName);
        for (int j = 0; j < maxAttWidth - attName.length(); j++) {
            temp.append(" ");
        }

        String strVal;
        String valMeanMode;
        // full data
        if (m_ClusterCentroids.attribute(i).isNominal()) {
            if (m_FullMeansOrMediansOrModes[i] == -1) { // missing
                valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
            } else {
                valMeanMode = pad(
                        (strVal = m_ClusterCentroids.attribute(i).value((int) m_FullMeansOrMediansOrModes[i])),
                        " ", maxWidth + 1 - strVal.length(), true);
            }
        } else {
            if (Double.isNaN(m_FullMeansOrMediansOrModes[i])) {
                valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
            } else {
                valMeanMode = pad(
                        (strVal = Utils.doubleToString(m_FullMeansOrMediansOrModes[i], maxWidth, 4).trim()),
                        " ", maxWidth + 1 - strVal.length(), true);
            }
        }
        temp.append(valMeanMode);

        for (int j = 0; j < m_NumClusters; j++) {
            if (m_ClusterCentroids.attribute(i).isNominal()) {
                if (m_ClusterCentroids.instance(j).isMissing(i)) {
                    valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
                } else {
                    valMeanMode = pad(
                            (strVal = m_ClusterCentroids.attribute(i)
                                    .value((int) m_ClusterCentroids.instance(j).value(i))),
                            " ", maxWidth + 1 - strVal.length(), true);
                }
            } else {
                if (m_ClusterCentroids.instance(j).isMissing(i)) {
                    valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
                } else {
                    valMeanMode = pad((strVal = Utils
                            .doubleToString(m_ClusterCentroids.instance(j).value(i), maxWidth, 4).trim()), " ",
                            maxWidth + 1 - strVal.length(), true);
                }
            }
            temp.append(valMeanMode);
        }
        temp.append("\n");

        if (m_displayStdDevs) {
            // Std devs/max nominal
            String stdDevVal = "";

            if (m_ClusterCentroids.attribute(i).isNominal()) {
                // Do the values of the nominal attribute
                Attribute a = m_ClusterCentroids.attribute(i);
                for (int j = 0; j < a.numValues(); j++) {
                    // full data
                    String val = "  " + a.value(j);
                    temp.append(pad(val, " ", maxAttWidth + 1 - val.length(), false));
                    int count = m_FullNominalCounts[i][j];
                    int percent = (int) ((double) m_FullNominalCounts[i][j] / Utils.sum(m_ClusterSizes)
                            * 100.0);
                    String percentS = "" + percent + "%)";
                    percentS = pad(percentS, " ", 5 - percentS.length(), true);
                    stdDevVal = "" + count + " (" + percentS;
                    stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                    temp.append(stdDevVal);

                    // Clusters
                    for (int k = 0; k < m_NumClusters; k++) {
                        count = m_ClusterNominalCounts[k][i][j];
                        percent = (int) ((double) m_ClusterNominalCounts[k][i][j] / m_ClusterSizes[k] * 100.0);
                        percentS = "" + percent + "%)";
                        percentS = pad(percentS, " ", 5 - percentS.length(), true);
                        stdDevVal = "" + count + " (" + percentS;
                        stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                        temp.append(stdDevVal);
                    }
                    temp.append("\n");
                }
                // missing (if any)
                if (m_FullMissingCounts[i] > 0) {
                    // Full data
                    temp.append(pad("  missing", " ", maxAttWidth + 1 - "  missing".length(), false));
                    int count = m_FullMissingCounts[i];
                    int percent = (int) ((double) m_FullMissingCounts[i] / Utils.sum(m_ClusterSizes) * 100.0);
                    String percentS = "" + percent + "%)";
                    percentS = pad(percentS, " ", 5 - percentS.length(), true);
                    stdDevVal = "" + count + " (" + percentS;
                    stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                    temp.append(stdDevVal);

                    // Clusters
                    for (int k = 0; k < m_NumClusters; k++) {
                        count = m_ClusterMissingCounts[k][i];
                        percent = (int) ((double) m_ClusterMissingCounts[k][i] / m_ClusterSizes[k] * 100.0);
                        percentS = "" + percent + "%)";
                        percentS = pad(percentS, " ", 5 - percentS.length(), true);
                        stdDevVal = "" + count + " (" + percentS;
                        stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                        temp.append(stdDevVal);
                    }

                    temp.append("\n");
                }

                temp.append("\n");
            } else {
                // Full data
                if (Double.isNaN(m_FullMeansOrMediansOrModes[i])) {
                    stdDevVal = pad("--", " ", maxAttWidth + maxWidth + 1 - 2, true);
                } else {
                    stdDevVal = pad(
                            (strVal = plusMinus + Utils.doubleToString(m_FullStdDevs[i], maxWidth, 4).trim()),
                            " ", maxWidth + maxAttWidth + 1 - strVal.length(), true);
                }
                temp.append(stdDevVal);

                // Clusters
                for (int j = 0; j < m_NumClusters; j++) {
                    if (m_ClusterCentroids.instance(j).isMissing(i)) {
                        stdDevVal = pad("--", " ", maxWidth + 1 - 2, true);
                    } else {
                        stdDevVal = pad((strVal = plusMinus + Utils
                                .doubleToString(m_ClusterStdDevs.instance(j).value(i), maxWidth, 4).trim()),
                                " ", maxWidth + 1 - strVal.length(), true);
                    }
                    temp.append(stdDevVal);
                }
                temp.append("\n\n");
            }
        }
    }

    temp.append("\n\n");
    return temp.toString();
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Evaluates the classifier on a single instance and records the prediction
 * (if the class is nominal)./*from   w w w.j  a v a2  s .c  o m*/
 * 
 * @param classifier machine learning classifier
 * @param instance the test instance to be classified
 * @return the prediction made by the clasifier
 * @throws Exception if model could not be evaluated successfully or the data
 *           contains string attributes
 */
public double evaluateModelOnceAndRecordPrediction(List<LibSVM> classifier, List<Double> classifierWeight,
        Instance instance) throws Exception {
    Instance classMissing = (Instance) instance.copy();
    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
        if (m_Predictions == null) {
            m_Predictions = new FastVector();
        }
        List<double[]> prob = new ArrayList<double[]>();//
        double[] finalProb = new double[instance.numClasses()];
        for (int i = 0; i < classifier.size(); i++) {
            double[] dist = classifier.get(i).distributionForInstance(classMissing);//
            prob.add(dist);
        }
        for (int i = 0; i < finalProb.length; i++) {
            for (int j = 0; j < classifier.size(); j++) {
                finalProb[i] += prob.get(j)[i] * classifierWeight.get(j);
            }
        }
        double sum = 0;
        for (int i = 0; i < finalProb.length; i++) {
            sum += finalProb[i];
        }
        for (int i = 0; i < finalProb.length; i++) {
            finalProb[i] = finalProb[i] / sum;
        }
        pred = Utils.maxIndex(finalProb);
        if (finalProb[(int) pred] <= 0) {
            pred = Instance.missingValue();
        }
        updateStatsForClassifier(finalProb, instance);
        m_Predictions.addElement(new NominalPrediction(instance.classValue(), finalProb, instance.weight()));
    } else {

        pred = classifier.get(0).classifyInstance(classMissing);
        updateStatsForPredictor(pred, instance);
    }
    return pred;
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Evaluates the classifier on a single instance.
 * /*  ww w.  j a va2  s.  co  m*/
 * @param classifier machine learning classifier
 * @param instance the test instance to be classified
 * @return the prediction made by the clasifier
 * @throws Exception if model could not be evaluated successfully or the data
 *           contains string attributes
 */
public double evaluateModelOnce(Classifier classifier, Instance instance) throws Exception {

    Instance classMissing = (Instance) instance.copy();
    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
        double[] dist = classifier.distributionForInstance(classMissing);
        pred = Utils.maxIndex(dist);
        if (dist[(int) pred] <= 0) {
            pred = Instance.missingValue();
        }
        updateStatsForClassifier(dist, instance);
    } else {
        pred = classifier.classifyInstance(classMissing);
        updateStatsForPredictor(pred, instance);
    }
    return pred;
}