Example usage for weka.core Utils sum

List of usage examples for weka.core Utils sum

Introduction

In this page you can find the example usage for weka.core Utils sum.

Prototype

public staticint sum(int[] ints) 

Source Link

Document

Computes the sum of the elements of an array of integers.

Usage

From source file:Bilbo.java

License:Open Source License

/**
 * Bagging method./*from w w w .  jav  a 2 s  . com*/
 *
 * @param data the training data to be used for generating the
 * bagged classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data, Instances p_unlabeledData) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // Has user asked to represent copies using weights?
    if (getRepresentCopiesUsingWeights() && !(m_Classifier instanceof WeightedInstancesHandler)) {
        throw new IllegalArgumentException("Cannot represent copies using weights when "
                + "base learner in bagging does not implement " + "WeightedInstancesHandler.");
    }

    // get fresh Instances object
    m_data = new Instances(data);
    m_unlabeledData = new Instances(p_unlabeledData);

    super.buildClassifier(m_data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    m_random = new Random(m_Seed);

    m_inBag = null;
    if (m_CalcOutOfBag)
        m_inBag = new boolean[m_Classifiers.length][];

    for (int j = 0; j < m_Classifiers.length; j++) {
        if (m_Classifier instanceof Randomizable) {
            ((Randomizable) m_Classifiers[j]).setSeed(m_random.nextInt());
        }
    }
    //Insert oracle loop here TODO

    buildClassifiers();
    Instances inst = new Instances(m_data);
    for (int i = 0; i < m_Classifiers.length; i++) {
        inst.clear();
        ((NewTree) m_Classifiers[i]).GetTransductedInstances(inst);
        ((NewTree) m_Classifiers[i]).DoInduction(inst);
        // Ehm, do something boyski
    }
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = m_data.classAttribute().isNumeric();

        for (int i = 0; i < m_data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[m_data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (m_inBag[j][i])
                    continue;

                if (numeric) {
                    double pred = ((NewTree) m_Classifiers[j]).classifyInstance(m_data.instance(i));
                    if (!Utils.isMissingValue(pred)) {
                        votes[0] += pred;
                        voteCount++;
                    }
                } else {
                    voteCount++;
                    double[] newProbs = ((NewTree) m_Classifiers[j])
                            .distributionForInstance(m_data.instance(i));
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }
                }
            }

            // "vote"
            if (numeric) {
                if (voteCount == 0) {
                    vote = Utils.missingValue();
                } else {
                    vote = votes[0] / voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                    vote = Utils.missingValue();
                } else {
                    vote = Utils.maxIndex(votes); // predicted class
                    Utils.normalize(votes);
                }
            }

            // error for instance
            if (!Utils.isMissingValue(vote) && !m_data.instance(i).classIsMissing()) {
                outOfBagCount += m_data.instance(i).weight();
                if (numeric) {
                    errorSum += (StrictMath.abs(vote - m_data.instance(i).classValue())
                            * m_data.instance(i).weight()) / m_data.instance(i).classValue();
                } else {
                    if (vote != m_data.instance(i).classValue())
                        errorSum += m_data.instance(i).weight();
                }
            }
        }

        if (outOfBagCount > 0) {
            m_OutOfBagError = errorSum / outOfBagCount;
        }
    } else {
        m_OutOfBagError = 0;
    }

    // save memory
    m_data = null;
}

From source file:Bilbo.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test
 * instance./*from  w  ww.jav  a2s.  co m*/
 *
 * @param instance the instance to be classified
 * @return preedicted class probability distribution
 * @throws Exception if distribution can't be computed successfully 
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    double[] sums = new double[instance.numClasses()], newProbs;

    double numPreds = 0;
    for (int i = 0; i < m_NumIterations; i++) {
        if (instance.classAttribute().isNumeric() == true) {
            double pred = ((NewTree) m_Classifiers[i]).classifyInstance(instance);
            if (!Utils.isMissingValue(pred)) {
                sums[0] += pred;
                numPreds++;
            }
        } else {
            newProbs = ((NewTree) m_Classifiers[i]).distributionForInstance(instance);
            for (int j = 0; j < newProbs.length; j++)
                sums[j] += newProbs[j];
        }
    }
    if (instance.classAttribute().isNumeric() == true) {
        if (numPreds == 0) {
            sums[0] = Utils.missingValue();
        } else {
            sums[0] /= numPreds;
        }
        return sums;
    } else if (Utils.eq(Utils.sum(sums), 0)) {
        return sums;
    } else {
        Utils.normalize(sums);
        return sums;
    }
}

From source file:PrincipalComponents.java

License:Open Source License

private void buildAttributeConstructor(Instances data) throws Exception {
    m_eigenvalues = null;//w w w .  j a v a  2s  .  c o  m
    m_outputNumAtts = -1;
    m_attributeFilter = null;
    m_nominalToBinFilter = null;
    m_sumOfEigenValues = 0.0;
    m_trainInstances = new Instances(data);

    // make a copy of the training data so that we can get the class
    // column to append to the transformed data (if necessary)
    m_trainHeader = new Instances(m_trainInstances, 0);

    m_replaceMissingFilter = new ReplaceMissingValues();
    m_replaceMissingFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_replaceMissingFilter);

    /*
       * if (m_normalize) { m_normalizeFilter = new Normalize();
     * m_normalizeFilter.setInputFormat(m_trainInstances); m_trainInstances
     * = Filter.useFilter(m_trainInstances, m_normalizeFilter); }
     */

    m_nominalToBinFilter = new NominalToBinary();
    m_nominalToBinFilter.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_nominalToBinFilter);

    // delete any attributes with only one distinct value or are all missing
    Vector<Integer> deleteCols = new Vector<Integer>();
    for (int i = 0; i < m_trainInstances.numAttributes(); i++) {
        if (m_trainInstances.numDistinctValues(i) <= 1) {
            deleteCols.addElement(new Integer(i));
        }
    }

    if (m_trainInstances.classIndex() >= 0) {
        // get rid of the class column
        m_hasClass = true;
        m_classIndex = m_trainInstances.classIndex();
        deleteCols.addElement(new Integer(m_classIndex));
    }

    // remove columns from the data if necessary
    if (deleteCols.size() > 0) {
        m_attributeFilter = new Remove();
        int[] todelete = new int[deleteCols.size()];
        for (int i = 0; i < deleteCols.size(); i++) {
            todelete[i] = (deleteCols.elementAt(i)).intValue();
        }
        m_attributeFilter.setAttributeIndicesArray(todelete);
        m_attributeFilter.setInvertSelection(false);
        m_attributeFilter.setInputFormat(m_trainInstances);
        m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter);
    }

    // can evaluator handle the processed data ? e.g., enough attributes?
    getCapabilities().testWithFail(m_trainInstances);

    m_numInstances = m_trainInstances.numInstances();
    m_numAttribs = m_trainInstances.numAttributes();

    fillCovariance();

    SymmDenseEVD evd = SymmDenseEVD.factorize(m_correlation);

    m_eigenvectors = Matrices.getArray(evd.getEigenvectors());
    m_eigenvalues = evd.getEigenvalues();

    /*
       * for (int i = 0; i < m_numAttribs; i++) { for (int j = 0; j <
     * m_numAttribs; j++) { System.err.println(v[i][j] + " "); }
     * System.err.println(d[i]); }
     */

    // any eigenvalues less than 0 are not worth anything --- change to 0
    for (int i = 0; i < m_eigenvalues.length; i++) {
        if (m_eigenvalues[i] < 0) {
            m_eigenvalues[i] = 0.0;
        }
    }
    m_sortedEigens = Utils.sort(m_eigenvalues);
    m_sumOfEigenValues = Utils.sum(m_eigenvalues);

    m_transformedFormat = setOutputFormat();
    if (m_transBackToOriginal) {
        m_originalSpaceFormat = setOutputFormatOriginal();

        // new ordered eigenvector matrix
        int numVectors = (m_transformedFormat.classIndex() < 0) ? m_transformedFormat.numAttributes()
                : m_transformedFormat.numAttributes() - 1;

        double[][] orderedVectors = new double[m_eigenvectors.length][numVectors + 1];

        // try converting back to the original space
        for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {
            for (int j = 0; j < m_numAttribs; j++) {
                orderedVectors[j][m_numAttribs - i] = m_eigenvectors[j][m_sortedEigens[i]];
            }
        }

        // transpose the matrix
        int nr = orderedVectors.length;
        int nc = orderedVectors[0].length;
        m_eTranspose = new double[nc][nr];
        for (int i = 0; i < nc; i++) {
            for (int j = 0; j < nr; j++) {
                m_eTranspose[i][j] = orderedVectors[j][i];
            }
        }
    }
}

From source file:BaggingImprove.java

/**
 * Bagging method.//www.j a  va  2 s . com
 *
 * @param data the training data to be used for generating the bagged
 * classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    //data.deleteWithMissingClass();

    super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }
    //+
    System.out.println("Classifier length" + m_Classifiers.length);

    int bagSize = data.numInstances() * m_BagSizePercent / 100;
    //+
    System.out.println("Bag Size " + bagSize);

    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag) {
        inBag = new boolean[m_Classifiers.length][];
    }

    //+
    //inisialisasi nama penamaan model
    BufferedWriter writer = new BufferedWriter(new FileWriter("Bootstrap.txt"));

    for (int j = 0; j < m_Classifiers.length; j++) {

        Instances bagData = null;

        // create the in-bag dataset
        if (m_CalcOutOfBag) {
            inBag[j] = new boolean[data.numInstances()];

            //System.out.println("Inbag1 " + inBag[0][1]);
            //bagData = resampleWithWeights(data, random, inBag[j]);
            bagData = data.resampleWithWeights(random, inBag[j]);
            //System.out.println("num after resample " + bagData.numInstances());
            //+
            //                for (int k = 0; k < bagData.numInstances(); k++) {
            //                    System.out.println("Bag Data after resample [calc out bag]" + bagData.instance(k));
            //                }

        } else {
            //+
            System.out.println("Not m_Calc out of bag");
            System.out.println("Please configure code inside!");

            bagData = data.resampleWithWeights(random);
            if (bagSize < data.numInstances()) {
                bagData.randomize(random);
                Instances newBagData = new Instances(bagData, 0, bagSize);
                bagData = newBagData;
            }
        }

        if (m_Classifier instanceof Randomizable) {
            //+
            System.out.println("Randomizable");
            ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
        }

        //write bootstrap into file
        writer.write("Bootstrap " + j);
        writer.newLine();
        writer.write(bagData.toString());
        writer.newLine();

        System.out.println("Berhasil menyimpan bootstrap ke file ");

        System.out.println("Bootstrap " + j + 1);
        //            textarea.append("\nBootsrap " + (j + 1));
        //System.out.println("num instance kedua kali "+bagData.numInstances());

        for (int b = 1; b < bagData.numInstances(); b++) {
            System.out.println("" + bagData.instance(b));
            //                textarea.append("\n" + bagData.instance(b));
        }
        //            //+

        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);
        //            //+
        //            
        //            SerializationHelper serialization = new SerializationHelper();
        //            serialization.write("KnnData"+model+".model", m_Classifiers[j]);
        //            System.out.println("Finish write into model");
        //            model++;
    }

    writer.flush();
    writer.close();
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric) {
                votes = new double[1];
            } else {
                votes = new double[data.numClasses()];
            }

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i]) {
                    continue;
                }
                voteCount++;
                // double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric) {
                    // votes[0] += pred;
                    votes[0] = m_Classifiers[j].classifyInstance(data.instance(i));
                } else {
                    // votes[(int) pred]++;
                    double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i));
                    //-
                    //                        for(double a : newProbs)
                    //                        {
                    //                            System.out.println("Double new probs %.f "+a);
                    //                        }
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }

                }
            }
            System.out.println("Vote count %d" + voteCount);

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                } else {
                    Utils.normalize(votes);

                }
                vote = Utils.maxIndex(votes); // predicted class
                //-
                System.out.println("Vote " + vote);

            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else if (vote != data.instance(i).classValue()) {
                //+
                System.out.println("Vote terakhir" + data.instance(i).classValue());
                errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}

From source file:BaggingImprove.java

/**
 * Calculates the class membership probabilities for the given test
 * instance./*from   w  w w .  j  ava  2s .  c o  m*/
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 * @throws Exception if distribution can't be computed successfully
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    double[] sums = new double[instance.numClasses()], newProbs;
    //-
    //System.out.println("\nDistribution For Instance\n");
    for (int i = 0; i < m_NumIterations; i++) {
        if (instance.classAttribute().isNumeric() == true) {
            //System.out.println(m_Classifiers[i].classifyInstance(instance));
            sums[0] += m_Classifiers[i].classifyInstance(instance);
        } else {
            //System.out.println(m_Classifiers[i].distributionForInstance(instance));
            newProbs = m_Classifiers[i].distributionForInstance(instance);
            //-
            //                for (int j = 0; j < newProbs.length; j++) {
            //                    sums[j] += newProbs[j];
            //                    System.out.println("Sums "+sums[j]);
            //                }
            //+

        }
    }
    if (instance.classAttribute().isNumeric() == true) {
        sums[0] /= m_NumIterations;
        return sums;
    } else if (Utils.eq(Utils.sum(sums), 0)) {
        return sums;
    } else {
        Utils.normalize(sums);
        return sums;
    }
}

From source file:MultiClassClassifier.java

License:Open Source License

/**
 * Returns the distribution for an instance.
 *
 * @param inst the instance to get the distribution for
 * @return the distribution/*  w  w  w. j  a  va2s .c  o  m*/
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance inst) throws Exception {

    if (m_Classifiers.length == 1) {
        return m_Classifiers[0].distributionForInstance(inst);
    }

    double[] probs = new double[inst.numClasses()];

    if (m_Method == METHOD_1_AGAINST_1) {
        double[][] r = new double[inst.numClasses()][inst.numClasses()];
        double[][] n = new double[inst.numClasses()][inst.numClasses()];

        for (int i = 0; i < m_ClassFilters.length; i++) {
            if (m_Classifiers[i] != null) {
                Instance tempInst = (Instance) inst.copy();
                tempInst.setDataset(m_TwoClassDataset);
                double[] current = m_Classifiers[i].distributionForInstance(tempInst);
                Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
                range.setUpper(m_ClassAttribute.numValues());
                int[] pair = range.getSelection();
                if (m_pairwiseCoupling && inst.numClasses() > 2) {
                    r[pair[0]][pair[1]] = current[0];
                    n[pair[0]][pair[1]] = m_SumOfWeights[i];
                } else {
                    if (current[0] > current[1]) {
                        probs[pair[0]] += 1.0;
                    } else {
                        probs[pair[1]] += 1.0;
                    }
                }
            }
        }
        if (m_pairwiseCoupling && inst.numClasses() > 2) {
            return pairwiseCoupling(n, r);
        }
    } else {
        // error correcting style methods
        for (int i = 0; i < m_ClassFilters.length; i++) {
            m_ClassFilters[i].input(inst);
            m_ClassFilters[i].batchFinished();
            double[] current = m_Classifiers[i].distributionForInstance(m_ClassFilters[i].output());
            //Calibrate the binary classifier scores

            for (int j = 0; j < m_ClassAttribute.numValues(); j++) {
                if (((MakeIndicator) m_ClassFilters[i]).getValueRange().isInRange(j)) {
                    probs[j] += current[1];
                } else {
                    probs[j] += current[0];
                }
            }
        }
    }

    if (Utils.gr(Utils.sum(probs), 0)) {
        Utils.normalize(probs);
        return probs;
    } else {
        return m_ZeroR.distributionForInstance(inst);
    }
}

From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java

License:Open Source License

/**
 * return a string describing this clusterer
 * /*from  w  w  w  .java  2 s.com*/
 * @return a description of the clusterer as a string
 */
@Override
public String toString() {
    if (m_ClusterCentroids == null) {
        return "No clusterer built yet!";
    }

    int maxWidth = 0;
    int maxAttWidth = 0;
    boolean containsNumeric = false;
    for (int i = 0; i < m_NumClusters; i++) {
        for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {
            if (m_ClusterCentroids.attribute(j).name().length() > maxAttWidth) {
                maxAttWidth = m_ClusterCentroids.attribute(j).name().length();
            }
            if (m_ClusterCentroids.attribute(j).isNumeric()) {
                containsNumeric = true;
                double width = Math.log(Math.abs(m_ClusterCentroids.instance(i).value(j))) / Math.log(10.0);
                // System.err.println(m_ClusterCentroids.instance(i).value(j)+" "+width);
                if (width < 0) {
                    width = 1;
                }
                // decimal + # decimal places + 1
                width += 6.0;
                if ((int) width > maxWidth) {
                    maxWidth = (int) width;
                }
            }
        }
    }

    for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {
        if (m_ClusterCentroids.attribute(i).isNominal()) {
            Attribute a = m_ClusterCentroids.attribute(i);
            for (int j = 0; j < m_ClusterCentroids.numInstances(); j++) {
                String val = a.value((int) m_ClusterCentroids.instance(j).value(i));
                if (val.length() > maxWidth) {
                    maxWidth = val.length();
                }
            }
            for (int j = 0; j < a.numValues(); j++) {
                String val = a.value(j) + " ";
                if (val.length() > maxAttWidth) {
                    maxAttWidth = val.length();
                }
            }
        }
    }

    if (m_displayStdDevs) {
        // check for maximum width of maximum frequency count
        for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {
            if (m_ClusterCentroids.attribute(i).isNominal()) {
                int maxV = Utils.maxIndex(m_FullNominalCounts[i]);
                /*
                 * int percent = (int)((double)m_FullNominalCounts[i][maxV] /
                 * Utils.sum(m_ClusterSizes) * 100.0);
                 */
                int percent = 6; // max percent width (100%)
                String nomV = "" + m_FullNominalCounts[i][maxV];
                // + " (" + percent + "%)";
                if (nomV.length() + percent > maxWidth) {
                    maxWidth = nomV.length() + 1;
                }
            }
        }
    }

    // check for size of cluster sizes
    for (int m_ClusterSize : m_ClusterSizes) {
        String size = "(" + m_ClusterSize + ")";
        if (size.length() > maxWidth) {
            maxWidth = size.length();
        }
    }

    if (m_displayStdDevs && maxAttWidth < "missing".length()) {
        maxAttWidth = "missing".length();
    }

    String plusMinus = "+/-";
    maxAttWidth += 2;
    if (m_displayStdDevs && containsNumeric) {
        maxWidth += plusMinus.length();
    }
    if (maxAttWidth < "Attribute".length() + 2) {
        maxAttWidth = "Attribute".length() + 2;
    }

    if (maxWidth < "Full Data".length()) {
        maxWidth = "Full Data".length() + 1;
    }

    if (maxWidth < "missing".length()) {
        maxWidth = "missing".length() + 1;
    }

    StringBuffer temp = new StringBuffer();
    // String naString = "N/A";

    /*
     * for (int i = 0; i < maxWidth+2; i++) { naString += " "; }
     */
    temp.append("\nkMeans\n======\n");
    temp.append("\nNumber of iterations: " + m_Iterations + "\n");

    if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir
            || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir
            || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir
            || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir) {
        temp.append("Within cluster sum of squared errors: " + Utils.sum(m_squaredErrors));
    } else {
        temp.append("Sum of within cluster distances: " + Utils.sum(m_squaredErrors));
    }

    if (!m_dontReplaceMissing) {
        temp.append("\nMissing values globally replaced with mean/mode");
    }

    temp.append("\n\nCluster centroids:\n");
    temp.append(pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2)) - "Cluster#".length(), true));

    temp.append("\n");
    temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false));

    temp.append(pad("Full Data", " ", maxWidth + 1 - "Full Data".length(), true));

    // cluster numbers
    for (int i = 0; i < m_NumClusters; i++) {
        String clustNum = "" + i;
        temp.append(pad(clustNum, " ", maxWidth + 1 - clustNum.length(), true));
    }
    temp.append("\n");

    // cluster sizes
    String cSize = "(" + Utils.sum(m_ClusterSizes) + ")";
    temp.append(pad(cSize, " ", maxAttWidth + maxWidth + 1 - cSize.length(), true));
    for (int i = 0; i < m_NumClusters; i++) {
        cSize = "(" + m_ClusterSizes[i] + ")";
        temp.append(pad(cSize, " ", maxWidth + 1 - cSize.length(), true));
    }
    temp.append("\n");

    temp.append(pad("", "=", maxAttWidth
            + (maxWidth * (m_ClusterCentroids.numInstances() + 1) + m_ClusterCentroids.numInstances() + 1),
            true));
    temp.append("\n");

    for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {
        String attName = m_ClusterCentroids.attribute(i).name();
        temp.append(attName);
        for (int j = 0; j < maxAttWidth - attName.length(); j++) {
            temp.append(" ");
        }

        String strVal;
        String valMeanMode;
        // full data
        if (m_ClusterCentroids.attribute(i).isNominal()) {
            if (m_FullMeansOrMediansOrModes[i] == -1) { // missing
                valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
            } else {
                valMeanMode = pad(
                        (strVal = m_ClusterCentroids.attribute(i).value((int) m_FullMeansOrMediansOrModes[i])),
                        " ", maxWidth + 1 - strVal.length(), true);
            }
        } else {
            if (Double.isNaN(m_FullMeansOrMediansOrModes[i])) {
                valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
            } else {
                valMeanMode = pad(
                        (strVal = Utils.doubleToString(m_FullMeansOrMediansOrModes[i], maxWidth, 4).trim()),
                        " ", maxWidth + 1 - strVal.length(), true);
            }
        }
        temp.append(valMeanMode);

        for (int j = 0; j < m_NumClusters; j++) {
            if (m_ClusterCentroids.attribute(i).isNominal()) {
                if (m_ClusterCentroids.instance(j).isMissing(i)) {
                    valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
                } else {
                    valMeanMode = pad(
                            (strVal = m_ClusterCentroids.attribute(i)
                                    .value((int) m_ClusterCentroids.instance(j).value(i))),
                            " ", maxWidth + 1 - strVal.length(), true);
                }
            } else {
                if (m_ClusterCentroids.instance(j).isMissing(i)) {
                    valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
                } else {
                    valMeanMode = pad((strVal = Utils
                            .doubleToString(m_ClusterCentroids.instance(j).value(i), maxWidth, 4).trim()), " ",
                            maxWidth + 1 - strVal.length(), true);
                }
            }
            temp.append(valMeanMode);
        }
        temp.append("\n");

        if (m_displayStdDevs) {
            // Std devs/max nominal
            String stdDevVal = "";

            if (m_ClusterCentroids.attribute(i).isNominal()) {
                // Do the values of the nominal attribute
                Attribute a = m_ClusterCentroids.attribute(i);
                for (int j = 0; j < a.numValues(); j++) {
                    // full data
                    String val = "  " + a.value(j);
                    temp.append(pad(val, " ", maxAttWidth + 1 - val.length(), false));
                    int count = m_FullNominalCounts[i][j];
                    int percent = (int) ((double) m_FullNominalCounts[i][j] / Utils.sum(m_ClusterSizes)
                            * 100.0);
                    String percentS = "" + percent + "%)";
                    percentS = pad(percentS, " ", 5 - percentS.length(), true);
                    stdDevVal = "" + count + " (" + percentS;
                    stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                    temp.append(stdDevVal);

                    // Clusters
                    for (int k = 0; k < m_NumClusters; k++) {
                        count = m_ClusterNominalCounts[k][i][j];
                        percent = (int) ((double) m_ClusterNominalCounts[k][i][j] / m_ClusterSizes[k] * 100.0);
                        percentS = "" + percent + "%)";
                        percentS = pad(percentS, " ", 5 - percentS.length(), true);
                        stdDevVal = "" + count + " (" + percentS;
                        stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                        temp.append(stdDevVal);
                    }
                    temp.append("\n");
                }
                // missing (if any)
                if (m_FullMissingCounts[i] > 0) {
                    // Full data
                    temp.append(pad("  missing", " ", maxAttWidth + 1 - "  missing".length(), false));
                    int count = m_FullMissingCounts[i];
                    int percent = (int) ((double) m_FullMissingCounts[i] / Utils.sum(m_ClusterSizes) * 100.0);
                    String percentS = "" + percent + "%)";
                    percentS = pad(percentS, " ", 5 - percentS.length(), true);
                    stdDevVal = "" + count + " (" + percentS;
                    stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                    temp.append(stdDevVal);

                    // Clusters
                    for (int k = 0; k < m_NumClusters; k++) {
                        count = m_ClusterMissingCounts[k][i];
                        percent = (int) ((double) m_ClusterMissingCounts[k][i] / m_ClusterSizes[k] * 100.0);
                        percentS = "" + percent + "%)";
                        percentS = pad(percentS, " ", 5 - percentS.length(), true);
                        stdDevVal = "" + count + " (" + percentS;
                        stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                        temp.append(stdDevVal);
                    }

                    temp.append("\n");
                }

                temp.append("\n");
            } else {
                // Full data
                if (Double.isNaN(m_FullMeansOrMediansOrModes[i])) {
                    stdDevVal = pad("--", " ", maxAttWidth + maxWidth + 1 - 2, true);
                } else {
                    stdDevVal = pad(
                            (strVal = plusMinus + Utils.doubleToString(m_FullStdDevs[i], maxWidth, 4).trim()),
                            " ", maxWidth + maxAttWidth + 1 - strVal.length(), true);
                }
                temp.append(stdDevVal);

                // Clusters
                for (int j = 0; j < m_NumClusters; j++) {
                    if (m_ClusterCentroids.instance(j).isMissing(i)) {
                        stdDevVal = pad("--", " ", maxWidth + 1 - 2, true);
                    } else {
                        stdDevVal = pad((strVal = plusMinus + Utils
                                .doubleToString(m_ClusterStdDevs.instance(j).value(i), maxWidth, 4).trim()),
                                " ", maxWidth + 1 - strVal.length(), true);
                    }
                    temp.append(stdDevVal);
                }
                temp.append("\n\n");
            }
        }
    }

    temp.append("\n\n");
    return temp.toString();
}

From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java

License:Open Source License

/**
 * Gets the squared error for all clusters
 * 
 * @return the squared error
 */
public double getSquaredError() {
    return Utils.sum(m_squaredErrors);
}

From source file:aw_cluster.myKMeans.java

@Override
public String toString() {
    if (centroid == null) {
        return "No clusterer built yet!";
    }//w  w w  .j  a va  2  s .  c om

    int maxWidth = 0;
    int maxAttWidth = 0;
    boolean containsNumeric = false;
    for (int i = 0; i < numCluster; i++) {
        for (int j = 0; j < centroid.numAttributes(); j++) {
            if (centroid.attribute(j).name().length() > maxAttWidth) {
                maxAttWidth = centroid.attribute(j).name().length();
            }
            if (centroid.attribute(j).isNumeric()) {
                containsNumeric = true;
                double width = Math.log(Math.abs(centroid.instance(i).value(j))) / Math.log(10.0);
                if (width < 0) {
                    width = 1;
                }
                width += 6.0;
                if ((int) width > maxWidth) {
                    maxWidth = (int) width;
                }
            }
        }
    }

    for (int i = 0; i < centroid.numAttributes(); i++) {
        if (centroid.attribute(i).isNominal()) {
            Attribute a = centroid.attribute(i);
            for (int j = 0; j < centroid.numInstances(); j++) {
                String val = a.value((int) centroid.instance(j).value(i));
                if (val.length() > maxWidth) {
                    maxWidth = val.length();
                }
            }
            for (int j = 0; j < a.numValues(); j++) {
                String val = a.value(j) + " ";
                if (val.length() > maxAttWidth) {
                    maxAttWidth = val.length();
                }
            }
        }
    }

    // check for size of cluster sizes
    for (int i = 0; i < sizeEachCluster.length; i++) {
        String size = "(" + sizeEachCluster[i] + ")";
        if (size.length() > maxWidth) {
            maxWidth = size.length();
        }
    }

    String plusMinus = "+/-";
    maxAttWidth += 2;
    if (maxAttWidth < "Attribute".length() + 2) {
        maxAttWidth = "Attribute".length() + 2;
    }

    if (maxWidth < "Full Data".length()) {
        maxWidth = "Full Data".length() + 1;
    }

    if (maxWidth < "missing".length()) {
        maxWidth = "missing".length() + 1;
    }

    StringBuffer temp = new StringBuffer();
    temp.append("\nkMeans\n======\n");
    temp.append("\nNumber of iterations: " + numIteration + "\n");

    if (distanceFunction instanceof EuclideanDistance) {
        temp.append("Within cluster sum of squared errors: " + Utils.sum(squaredError));
    } else {
        temp.append("Sum of within cluster distances: " + Utils.sum(squaredError));
    }

    temp.append("\n\nCluster centroid:\n");
    temp.append(pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2)) - "Cluster#".length(), true));

    temp.append("\n");
    temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false));

    // cluster numbers
    for (int i = 0; i < numCluster; i++) {
        String clustNum = "" + i;
        temp.append(pad(clustNum, " ", maxWidth + 1 - clustNum.length(), true));
    }
    temp.append("\n");

    // cluster sizes
    String cSize = "";
    temp.append(pad(cSize, " ", maxAttWidth - cSize.length(), true));
    for (int i = 0; i < numCluster; i++) {
        cSize = "(" + sizeEachCluster[i] + ")";
        temp.append(pad(cSize, " ", maxWidth + 1 - cSize.length(), true));
    }
    temp.append("\n");

    temp.append(
            pad("", "=", maxAttWidth + (maxWidth * (centroid.numInstances()) + centroid.numInstances()), true));
    temp.append("\n");

    for (int i = 0; i < centroid.numAttributes(); i++) {
        String attName = centroid.attribute(i).name();
        temp.append(attName);
        for (int j = 0; j < maxAttWidth - attName.length(); j++) {
            temp.append(" ");
        }

        String strVal;
        String valMeanMode;

        for (int j = 0; j < numCluster; j++) {
            if (centroid.attribute(i).isNominal()) {
                if (centroid.instance(j).isMissing(i)) {
                    valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
                } else {
                    valMeanMode = pad(
                            (strVal = centroid.attribute(i).value((int) centroid.instance(j).value(i))), " ",
                            maxWidth + 1 - strVal.length(), true);
                }
            } else {
                if (centroid.instance(j).isMissing(i)) {
                    valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
                } else {
                    valMeanMode = pad(
                            (strVal = Utils.doubleToString(centroid.instance(j).value(i), maxWidth, 4).trim()),
                            " ", maxWidth + 1 - strVal.length(), true);
                }
            }
            temp.append(valMeanMode);
        }
        temp.append("\n");
    }

    temp.append("\n\n");
    return temp.toString();
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Generates the classifier./* w w w  .  j  a  va2 s  .c  o m*/
 *
 * @param instances set of instances serving as training data 
 * @throws Exception if the classifier has not been generated successfully
 */
public void buildClassifier(Instances instances) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal;
    double bestPoint = -Double.MAX_VALUE;
    int bestAtt = -1, numClasses;

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (instances.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(instances);
        return;
    } else {
        m_ZeroR = null;
    }

    double[][] bestDist = new double[3][instances.numClasses()];

    m_Instances = new Instances(instances);

    if (m_Instances.classAttribute().isNominal()) {
        numClasses = m_Instances.numClasses();
    } else {
        numClasses = 1;
    }

    // For each attribute
    boolean first = true;
    for (int i = 0; i < m_Instances.numAttributes(); i++) {
        if (i != m_Instances.classIndex()) {

            // Reserve space for distribution.
            m_Distribution = new double[3][numClasses];

            // Compute value of criterion for best split on attribute
            if (m_Instances.attribute(i).isNominal()) {
                currVal = findSplitNominal(i);
            } else {
                currVal = findSplitNumeric(i);
            }
            if ((first) || (currVal < bestVal)) {
                bestVal = currVal;
                bestAtt = i;
                bestPoint = m_SplitPoint;
                for (int j = 0; j < 3; j++) {
                    System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, numClasses);
                }
            }

            // First attribute has been investigated
            first = false;
        }
    }

    // Set attribute, split point and distribution.
    m_AttIndex = bestAtt;
    m_SplitPoint = bestPoint;
    m_Distribution = bestDist;
    if (m_Instances.classAttribute().isNominal()) {
        for (int i = 0; i < m_Distribution.length; i++) {
            double sumCounts = Utils.sum(m_Distribution[i]);
            if (sumCounts == 0) { // This means there were only missing attribute values
                System.arraycopy(m_Distribution[2], 0, m_Distribution[i], 0, m_Distribution[2].length);
                Utils.normalize(m_Distribution[i]);
            } else {
                Utils.normalize(m_Distribution[i], sumCounts);
            }
        }
    }

    // Save memory
    m_Instances = new Instances(m_Instances, 0);
}