Example usage for weka.core Attribute numValues

List of usage examples for weka.core Attribute numValues

Introduction

In this page you can find the example usage for weka.core Attribute numValues.

Prototype

public finalint numValues() 

Source Link

Document

Returns the number of attribute values.

Usage

From source file:GrowTree.java

Attribute bestSplit(Instances D) {
    double imin = 1.0;
    Attribute fbest = null;/*from  w  ww . j  a  va2  s . c om*/
    Enumeration enat = D.enumerateAttributes();
    while (enat.hasMoreElements()) {
        Attribute a = (Attribute) enat.nextElement();
        //split D into subsets d1 to dn based on values vi based on features
        Instances[] split = new Instances[a.numValues()];
        for (int i = 0; i < a.numValues(); i++) {
            split[i] = new Instances(D, D.numInstances());
        }
        Enumeration x = D.enumerateInstances();
        while (x.hasMoreElements()) {
            Instance in = (Instance) x.nextElement();
            split[(int) in.value(a)].add(in);
        }
        for (int i = 0; i < split.length; i++) {
            split[i].compactify();
        }
        for (int i = 0; i < a.numValues(); i++) {
            if (imp(split[i]) < imin) {
                imin = imp(split[i]);
                fbest = a; //evaluate the best feature to make root
            }
        }
    }
    return fbest;

}

From source file:ID3Chi.java

License:Open Source License

/**
 * Computes Chi-Square function for an attribute.
 *
 * @param data/*from   www .j av a 2 s .  c  o m*/
 *            the data for which info gain is to be computed
 * @param att
 *            the attribute
 * @return the chi-square for the given attribute and data
 * @throws Exception
 *             if computation fails
 */
private double computeChiSquare(Instances data, Attribute att) throws Exception {

    double chiSquare = 0;
    double[] classCounts = GetClassCounts(data);
    Instances[] subset = splitData(data, att);
    for (int j = 0; j < att.numValues(); j++) {
        if (subset[j].numInstances() > 0) {
            chiSquare += computeChiSquareForSubset(subset[j], att, classCounts, data.numInstances());
        }
    }
    return chiSquare;
}

From source file:ID3Chi.java

License:Open Source License

/**
 * Computes information gain for an attribute.
 *
 * @param data//from  w ww  .  ja v a  2 s.c  o m
 *            the data for which info gain is to be computed
 * @param att
 *            the attribute
 * @param entropyOfAllData
 *            entropy of data set
 * @return the information gain for the given attribute and data
 * @throws Exception
 *             if computation fails
 */
private double computeInfoGain(Instances data, Attribute att, double entropyOfAllData) throws Exception {

    double infoGain = entropyOfAllData;
    Instances[] subset = splitData(data, att);

    int numUnknown = subset[att.numValues()].numInstances();
    if (numUnknown == data.numInstances()) {
        return 0;
    }

    double[] classCountsUnknownData = GetClassCounts(subset[att.numValues()]);

    for (int j = 0; j < att.numValues(); j++) {
        if (subset[j].numInstances() > 0) {
            double ratio = (double) subset[j].numInstances() / (double) data.numInstances();
            infoGain -= (((double) subset[j].numInstances() + (double) numUnknown * ratio)
                    / (double) data.numInstances())
                    * computeEntropyWithUnknowns(subset[j], subset[att.numValues()], classCountsUnknownData,
                            ratio);
        }
    }
    return infoGain;
}

From source file:ID3Chi.java

License:Open Source License

/**
 * Splits a dataset according to the values of a nominal attribute.
 *
 * @param data//from   w  ww.  j a  va2 s.  c om
 *            the data which is to be split
 * @param att
 *            the attribute to be used for splitting
 * @return the sets of instances produced by the split
 */
private Instances[] splitData(Instances data, Attribute att) {

    // [att.numValues()] is location for "unknown" values
    Instances[] subset = new Instances[att.numValues() + 1];
    for (int j = 0; j <= att.numValues(); j++) {
        subset[j] = new Instances(data, data.numInstances());
    }

    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (inst.isMissing(att)) {
            subset[att.numValues()].add(inst);
        } else {
            subset[(int) inst.value(att)].add(inst);
        }
    }
    for (int i = 0; i < subset.length; i++) {
        subset[i].compactify();
    }
    return subset;
}

From source file:adams.flow.sink.WekaCostBenefitAnalysis.java

License:Open Source License

/**
 * Plots the token (the panel and dialog have already been created at
 * this stage).//  ww w. j av  a2s.c  o m
 *
 * @param token   the token to display
 */
@Override
protected void display(Token token) {
    Evaluation eval;
    Attribute classAtt;
    Attribute classAttToUse;
    int classValue;
    ThresholdCurve tc;
    Instances result;
    ArrayList<String> newNames;
    CostBenefitAnalysis cbAnalysis;
    PlotData2D tempd;
    boolean[] cp;
    int n;

    try {
        if (token.getPayload() instanceof WekaEvaluationContainer)
            eval = (Evaluation) ((WekaEvaluationContainer) token.getPayload())
                    .getValue(WekaEvaluationContainer.VALUE_EVALUATION);
        else
            eval = (Evaluation) token.getPayload();
        if (eval.predictions() == null) {
            getLogger().severe("No predictions available from Evaluation object!");
            return;
        }
        classAtt = eval.getHeader().classAttribute();
        m_ClassIndex.setData(classAtt);
        classValue = m_ClassIndex.getIntIndex();
        tc = new ThresholdCurve();
        result = tc.getCurve(eval.predictions(), classValue);

        // Create a dummy class attribute with the chosen
        // class value as index 0 (if necessary).
        classAttToUse = eval.getHeader().classAttribute();
        if (classValue != 0) {
            newNames = new ArrayList<>();
            newNames.add(classAtt.value(classValue));
            for (int k = 0; k < classAtt.numValues(); k++) {
                if (k != classValue)
                    newNames.add(classAtt.value(k));
            }
            classAttToUse = new Attribute(classAtt.name(), newNames);
        }
        // assemble plot data
        tempd = new PlotData2D(result);
        tempd.setPlotName(result.relationName());
        tempd.m_alwaysDisplayPointsOfThisSize = 10;
        // specify which points are connected
        cp = new boolean[result.numInstances()];
        for (n = 1; n < cp.length; n++)
            cp[n] = true;
        tempd.setConnectPoints(cp);
        // add plot
        m_CostBenefitPanel.setCurveData(tempd, classAttToUse);
    } catch (Exception e) {
        handleException("Failed to display token: " + token, e);
    }
}

From source file:adams.flow.sink.WekaCostBenefitAnalysis.java

License:Open Source License

/**
 * Creates a new panel for the token.//from w w  w  .  j a  v  a 2s . co  m
 *
 * @param token   the token to display in a new panel, can be null
 * @return      the generated panel
 */
public AbstractDisplayPanel createDisplayPanel(Token token) {
    AbstractDisplayPanel result;
    String name;

    if (token != null)
        name = "Cost curve (" + getEvaluation(token).getHeader().relationName() + ")";
    else
        name = "Cost curve";

    result = new AbstractComponentDisplayPanel(name) {
        private static final long serialVersionUID = -3513994354297811163L;
        protected CostBenefitAnalysis m_VisualizePanel;

        @Override
        protected void initGUI() {
            super.initGUI();
            setLayout(new BorderLayout());
            m_VisualizePanel = new CostBenefitAnalysis();
            add(m_VisualizePanel, BorderLayout.CENTER);
        }

        @Override
        public void display(Token token) {
            try {
                Evaluation eval = getEvaluation(token);
                Attribute classAtt = eval.getHeader().classAttribute();
                m_ClassIndex.setData(classAtt);
                int classValue = m_ClassIndex.getIntIndex();
                ThresholdCurve tc = new ThresholdCurve();
                Instances result = tc.getCurve(eval.predictions(), classValue);

                // Create a dummy class attribute with the chosen
                // class value as index 0 (if necessary).
                Attribute classAttToUse = eval.getHeader().classAttribute();
                if (classValue != 0) {
                    ArrayList<String> newNames = new ArrayList<>();
                    newNames.add(classAtt.value(classValue));
                    for (int k = 0; k < classAtt.numValues(); k++) {
                        if (k != classValue)
                            newNames.add(classAtt.value(k));
                    }
                    classAttToUse = new Attribute(classAtt.name(), newNames);
                }
                // assemble plot data
                PlotData2D tempd = new PlotData2D(result);
                tempd.setPlotName(result.relationName());
                tempd.m_alwaysDisplayPointsOfThisSize = 10;
                // specify which points are connected
                boolean[] cp = new boolean[result.numInstances()];
                for (int n = 1; n < cp.length; n++)
                    cp[n] = true;
                tempd.setConnectPoints(cp);
                // add plot
                m_VisualizePanel.setCurveData(tempd, classAttToUse);
            } catch (Exception e) {
                getLogger().log(Level.SEVERE, "Failed to display token: " + token, e);
            }
        }

        @Override
        public JComponent supplyComponent() {
            return m_VisualizePanel;
        }

        @Override
        public void clearPanel() {
        }

        public void cleanUp() {
        }
    };

    if (token != null)
        result.display(token);

    return result;
}

From source file:adams.flow.transformer.WekaReorderAttributesToReference.java

License:Open Source License

/**
 * Executes the flow item.//from ww  w . j  av  a  2 s .c  o  m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances dataOld;
    Instance instOld;
    Instances dataNew;
    Instance instNew;
    Attribute att;
    int i;
    StringBuilder order;
    List<Add> adds;
    Add add;
    int index;
    StringBuilder labels;
    int n;
    List<Filter> filters;
    Reorder reorder;

    result = null;

    if (m_OnTheFly && (m_Reference == null)) {
        result = setUpReference();
        if (result != null)
            return result;
    }

    dataNew = null;
    instNew = null;

    // get input data
    if (m_InputToken.getPayload() instanceof Instance) {
        instOld = (Instance) m_InputToken.getPayload();
        dataOld = instOld.dataset();
    } else {
        instOld = null;
        dataOld = (Instances) m_InputToken.getPayload();
    }

    // do we need to initialize filter?
    if (m_InitializeOnce || (m_Reorder == null)) {
        // check incoming data
        if (!m_Lenient) {
            for (i = 0; i < m_Reference.numAttributes(); i++) {
                att = m_Reference.attribute(i);
                if (dataOld.attribute(att.name()) == null) {
                    if (result == null)
                        result = "Missing attribute(s) in incoming data: " + att.name();
                    else
                        result += ", " + att.name();
                }
            }
            if (result != null)
                getLogger().severe(result);
        }

        if (result == null) {
            try {
                // determine indices
                order = new StringBuilder();
                adds = new ArrayList<Add>();
                for (i = 0; i < m_Reference.numAttributes(); i++) {
                    att = m_Reference.attribute(i);
                    if (dataOld.attribute(att.name()) == null) {
                        index = dataOld.numAttributes() + adds.size();
                        add = new Add();
                        add.setAttributeIndex("last");
                        add.setAttributeName(att.name());
                        add.setAttributeType(new SelectedTag(att.type(), Add.TAGS_TYPE));
                        if (att.isNominal()) {
                            labels = new StringBuilder();
                            for (n = 0; n < att.numValues(); n++) {
                                if (labels.length() > 0)
                                    labels.append(",");
                                labels.append(att.value(n));
                            }
                            add.setNominalLabels(labels.toString());
                        }
                        adds.add(add);
                    } else {
                        index = dataOld.attribute(att.name()).index();
                    }
                    if (order.length() > 0)
                        order.append(",");
                    order.append((index + 1));
                }

                // build reorder filter
                reorder = new Reorder();
                reorder.setAttributeIndices(order.toString());

                // build multifilter
                filters = new ArrayList<Filter>();
                filters.addAll(adds);
                filters.add(reorder);
                m_Reorder = new MultiFilter();
                m_Reorder.setFilters(filters.toArray(new Filter[filters.size()]));

                // initialize filter
                m_Reorder.setInputFormat(dataOld);
            } catch (Exception e) {
                result = handleException("Failed to initialize reorder filter!", e);
            }
        }
    }

    // reorder data
    if (result == null) {
        try {
            if (instOld != null) {
                m_Reorder.input(instOld);
                m_Reorder.batchFinished();
                instNew = m_Reorder.output();
                if (m_KeepRelationName)
                    instNew.dataset().setRelationName(dataOld.relationName());
            } else {
                dataNew = Filter.useFilter(dataOld, m_Reorder);
                if (m_KeepRelationName)
                    dataNew.setRelationName(dataOld.relationName());
            }
        } catch (Exception e) {
            result = handleException("Failed to reorder data!", e);
            instNew = null;
            dataNew = null;
        }
    }

    if (instNew != null)
        m_OutputToken = new Token(instNew);
    else if (dataNew != null)
        m_OutputToken = new Token(dataNew);

    return result;
}

From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java

License:Open Source License

/**
 * return a string describing this clusterer
 * /*www.  ja  v  a2  s. c o  m*/
 * @return a description of the clusterer as a string
 */
@Override
public String toString() {
    if (m_ClusterCentroids == null) {
        return "No clusterer built yet!";
    }

    int maxWidth = 0;
    int maxAttWidth = 0;
    boolean containsNumeric = false;
    for (int i = 0; i < m_NumClusters; i++) {
        for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {
            if (m_ClusterCentroids.attribute(j).name().length() > maxAttWidth) {
                maxAttWidth = m_ClusterCentroids.attribute(j).name().length();
            }
            if (m_ClusterCentroids.attribute(j).isNumeric()) {
                containsNumeric = true;
                double width = Math.log(Math.abs(m_ClusterCentroids.instance(i).value(j))) / Math.log(10.0);
                // System.err.println(m_ClusterCentroids.instance(i).value(j)+" "+width);
                if (width < 0) {
                    width = 1;
                }
                // decimal + # decimal places + 1
                width += 6.0;
                if ((int) width > maxWidth) {
                    maxWidth = (int) width;
                }
            }
        }
    }

    for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {
        if (m_ClusterCentroids.attribute(i).isNominal()) {
            Attribute a = m_ClusterCentroids.attribute(i);
            for (int j = 0; j < m_ClusterCentroids.numInstances(); j++) {
                String val = a.value((int) m_ClusterCentroids.instance(j).value(i));
                if (val.length() > maxWidth) {
                    maxWidth = val.length();
                }
            }
            for (int j = 0; j < a.numValues(); j++) {
                String val = a.value(j) + " ";
                if (val.length() > maxAttWidth) {
                    maxAttWidth = val.length();
                }
            }
        }
    }

    if (m_displayStdDevs) {
        // check for maximum width of maximum frequency count
        for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {
            if (m_ClusterCentroids.attribute(i).isNominal()) {
                int maxV = Utils.maxIndex(m_FullNominalCounts[i]);
                /*
                 * int percent = (int)((double)m_FullNominalCounts[i][maxV] /
                 * Utils.sum(m_ClusterSizes) * 100.0);
                 */
                int percent = 6; // max percent width (100%)
                String nomV = "" + m_FullNominalCounts[i][maxV];
                // + " (" + percent + "%)";
                if (nomV.length() + percent > maxWidth) {
                    maxWidth = nomV.length() + 1;
                }
            }
        }
    }

    // check for size of cluster sizes
    for (int m_ClusterSize : m_ClusterSizes) {
        String size = "(" + m_ClusterSize + ")";
        if (size.length() > maxWidth) {
            maxWidth = size.length();
        }
    }

    if (m_displayStdDevs && maxAttWidth < "missing".length()) {
        maxAttWidth = "missing".length();
    }

    String plusMinus = "+/-";
    maxAttWidth += 2;
    if (m_displayStdDevs && containsNumeric) {
        maxWidth += plusMinus.length();
    }
    if (maxAttWidth < "Attribute".length() + 2) {
        maxAttWidth = "Attribute".length() + 2;
    }

    if (maxWidth < "Full Data".length()) {
        maxWidth = "Full Data".length() + 1;
    }

    if (maxWidth < "missing".length()) {
        maxWidth = "missing".length() + 1;
    }

    StringBuffer temp = new StringBuffer();
    // String naString = "N/A";

    /*
     * for (int i = 0; i < maxWidth+2; i++) { naString += " "; }
     */
    temp.append("\nkMeans\n======\n");
    temp.append("\nNumber of iterations: " + m_Iterations + "\n");

    if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir
            || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir
            || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir
            || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir) {
        temp.append("Within cluster sum of squared errors: " + Utils.sum(m_squaredErrors));
    } else {
        temp.append("Sum of within cluster distances: " + Utils.sum(m_squaredErrors));
    }

    if (!m_dontReplaceMissing) {
        temp.append("\nMissing values globally replaced with mean/mode");
    }

    temp.append("\n\nCluster centroids:\n");
    temp.append(pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2)) - "Cluster#".length(), true));

    temp.append("\n");
    temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false));

    temp.append(pad("Full Data", " ", maxWidth + 1 - "Full Data".length(), true));

    // cluster numbers
    for (int i = 0; i < m_NumClusters; i++) {
        String clustNum = "" + i;
        temp.append(pad(clustNum, " ", maxWidth + 1 - clustNum.length(), true));
    }
    temp.append("\n");

    // cluster sizes
    String cSize = "(" + Utils.sum(m_ClusterSizes) + ")";
    temp.append(pad(cSize, " ", maxAttWidth + maxWidth + 1 - cSize.length(), true));
    for (int i = 0; i < m_NumClusters; i++) {
        cSize = "(" + m_ClusterSizes[i] + ")";
        temp.append(pad(cSize, " ", maxWidth + 1 - cSize.length(), true));
    }
    temp.append("\n");

    temp.append(pad("", "=", maxAttWidth
            + (maxWidth * (m_ClusterCentroids.numInstances() + 1) + m_ClusterCentroids.numInstances() + 1),
            true));
    temp.append("\n");

    for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) {
        String attName = m_ClusterCentroids.attribute(i).name();
        temp.append(attName);
        for (int j = 0; j < maxAttWidth - attName.length(); j++) {
            temp.append(" ");
        }

        String strVal;
        String valMeanMode;
        // full data
        if (m_ClusterCentroids.attribute(i).isNominal()) {
            if (m_FullMeansOrMediansOrModes[i] == -1) { // missing
                valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
            } else {
                valMeanMode = pad(
                        (strVal = m_ClusterCentroids.attribute(i).value((int) m_FullMeansOrMediansOrModes[i])),
                        " ", maxWidth + 1 - strVal.length(), true);
            }
        } else {
            if (Double.isNaN(m_FullMeansOrMediansOrModes[i])) {
                valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
            } else {
                valMeanMode = pad(
                        (strVal = Utils.doubleToString(m_FullMeansOrMediansOrModes[i], maxWidth, 4).trim()),
                        " ", maxWidth + 1 - strVal.length(), true);
            }
        }
        temp.append(valMeanMode);

        for (int j = 0; j < m_NumClusters; j++) {
            if (m_ClusterCentroids.attribute(i).isNominal()) {
                if (m_ClusterCentroids.instance(j).isMissing(i)) {
                    valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
                } else {
                    valMeanMode = pad(
                            (strVal = m_ClusterCentroids.attribute(i)
                                    .value((int) m_ClusterCentroids.instance(j).value(i))),
                            " ", maxWidth + 1 - strVal.length(), true);
                }
            } else {
                if (m_ClusterCentroids.instance(j).isMissing(i)) {
                    valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
                } else {
                    valMeanMode = pad((strVal = Utils
                            .doubleToString(m_ClusterCentroids.instance(j).value(i), maxWidth, 4).trim()), " ",
                            maxWidth + 1 - strVal.length(), true);
                }
            }
            temp.append(valMeanMode);
        }
        temp.append("\n");

        if (m_displayStdDevs) {
            // Std devs/max nominal
            String stdDevVal = "";

            if (m_ClusterCentroids.attribute(i).isNominal()) {
                // Do the values of the nominal attribute
                Attribute a = m_ClusterCentroids.attribute(i);
                for (int j = 0; j < a.numValues(); j++) {
                    // full data
                    String val = "  " + a.value(j);
                    temp.append(pad(val, " ", maxAttWidth + 1 - val.length(), false));
                    int count = m_FullNominalCounts[i][j];
                    int percent = (int) ((double) m_FullNominalCounts[i][j] / Utils.sum(m_ClusterSizes)
                            * 100.0);
                    String percentS = "" + percent + "%)";
                    percentS = pad(percentS, " ", 5 - percentS.length(), true);
                    stdDevVal = "" + count + " (" + percentS;
                    stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                    temp.append(stdDevVal);

                    // Clusters
                    for (int k = 0; k < m_NumClusters; k++) {
                        count = m_ClusterNominalCounts[k][i][j];
                        percent = (int) ((double) m_ClusterNominalCounts[k][i][j] / m_ClusterSizes[k] * 100.0);
                        percentS = "" + percent + "%)";
                        percentS = pad(percentS, " ", 5 - percentS.length(), true);
                        stdDevVal = "" + count + " (" + percentS;
                        stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                        temp.append(stdDevVal);
                    }
                    temp.append("\n");
                }
                // missing (if any)
                if (m_FullMissingCounts[i] > 0) {
                    // Full data
                    temp.append(pad("  missing", " ", maxAttWidth + 1 - "  missing".length(), false));
                    int count = m_FullMissingCounts[i];
                    int percent = (int) ((double) m_FullMissingCounts[i] / Utils.sum(m_ClusterSizes) * 100.0);
                    String percentS = "" + percent + "%)";
                    percentS = pad(percentS, " ", 5 - percentS.length(), true);
                    stdDevVal = "" + count + " (" + percentS;
                    stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                    temp.append(stdDevVal);

                    // Clusters
                    for (int k = 0; k < m_NumClusters; k++) {
                        count = m_ClusterMissingCounts[k][i];
                        percent = (int) ((double) m_ClusterMissingCounts[k][i] / m_ClusterSizes[k] * 100.0);
                        percentS = "" + percent + "%)";
                        percentS = pad(percentS, " ", 5 - percentS.length(), true);
                        stdDevVal = "" + count + " (" + percentS;
                        stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true);
                        temp.append(stdDevVal);
                    }

                    temp.append("\n");
                }

                temp.append("\n");
            } else {
                // Full data
                if (Double.isNaN(m_FullMeansOrMediansOrModes[i])) {
                    stdDevVal = pad("--", " ", maxAttWidth + maxWidth + 1 - 2, true);
                } else {
                    stdDevVal = pad(
                            (strVal = plusMinus + Utils.doubleToString(m_FullStdDevs[i], maxWidth, 4).trim()),
                            " ", maxWidth + maxAttWidth + 1 - strVal.length(), true);
                }
                temp.append(stdDevVal);

                // Clusters
                for (int j = 0; j < m_NumClusters; j++) {
                    if (m_ClusterCentroids.instance(j).isMissing(i)) {
                        stdDevVal = pad("--", " ", maxWidth + 1 - 2, true);
                    } else {
                        stdDevVal = pad((strVal = plusMinus + Utils
                                .doubleToString(m_ClusterStdDevs.instance(j).value(i), maxWidth, 4).trim()),
                                " ", maxWidth + 1 - strVal.length(), true);
                    }
                    temp.append(stdDevVal);
                }
                temp.append("\n\n");
            }
        }
    }

    temp.append("\n\n");
    return temp.toString();
}

From source file:aw_cluster.myKMeans.java

@Override
public String toString() {
    if (centroid == null) {
        return "No clusterer built yet!";
    }/*from ww  w  . j  a v  a  2s  .  c  o  m*/

    int maxWidth = 0;
    int maxAttWidth = 0;
    boolean containsNumeric = false;
    for (int i = 0; i < numCluster; i++) {
        for (int j = 0; j < centroid.numAttributes(); j++) {
            if (centroid.attribute(j).name().length() > maxAttWidth) {
                maxAttWidth = centroid.attribute(j).name().length();
            }
            if (centroid.attribute(j).isNumeric()) {
                containsNumeric = true;
                double width = Math.log(Math.abs(centroid.instance(i).value(j))) / Math.log(10.0);
                if (width < 0) {
                    width = 1;
                }
                width += 6.0;
                if ((int) width > maxWidth) {
                    maxWidth = (int) width;
                }
            }
        }
    }

    for (int i = 0; i < centroid.numAttributes(); i++) {
        if (centroid.attribute(i).isNominal()) {
            Attribute a = centroid.attribute(i);
            for (int j = 0; j < centroid.numInstances(); j++) {
                String val = a.value((int) centroid.instance(j).value(i));
                if (val.length() > maxWidth) {
                    maxWidth = val.length();
                }
            }
            for (int j = 0; j < a.numValues(); j++) {
                String val = a.value(j) + " ";
                if (val.length() > maxAttWidth) {
                    maxAttWidth = val.length();
                }
            }
        }
    }

    // check for size of cluster sizes
    for (int i = 0; i < sizeEachCluster.length; i++) {
        String size = "(" + sizeEachCluster[i] + ")";
        if (size.length() > maxWidth) {
            maxWidth = size.length();
        }
    }

    String plusMinus = "+/-";
    maxAttWidth += 2;
    if (maxAttWidth < "Attribute".length() + 2) {
        maxAttWidth = "Attribute".length() + 2;
    }

    if (maxWidth < "Full Data".length()) {
        maxWidth = "Full Data".length() + 1;
    }

    if (maxWidth < "missing".length()) {
        maxWidth = "missing".length() + 1;
    }

    StringBuffer temp = new StringBuffer();
    temp.append("\nkMeans\n======\n");
    temp.append("\nNumber of iterations: " + numIteration + "\n");

    if (distanceFunction instanceof EuclideanDistance) {
        temp.append("Within cluster sum of squared errors: " + Utils.sum(squaredError));
    } else {
        temp.append("Sum of within cluster distances: " + Utils.sum(squaredError));
    }

    temp.append("\n\nCluster centroid:\n");
    temp.append(pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2)) - "Cluster#".length(), true));

    temp.append("\n");
    temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false));

    // cluster numbers
    for (int i = 0; i < numCluster; i++) {
        String clustNum = "" + i;
        temp.append(pad(clustNum, " ", maxWidth + 1 - clustNum.length(), true));
    }
    temp.append("\n");

    // cluster sizes
    String cSize = "";
    temp.append(pad(cSize, " ", maxAttWidth - cSize.length(), true));
    for (int i = 0; i < numCluster; i++) {
        cSize = "(" + sizeEachCluster[i] + ")";
        temp.append(pad(cSize, " ", maxWidth + 1 - cSize.length(), true));
    }
    temp.append("\n");

    temp.append(
            pad("", "=", maxAttWidth + (maxWidth * (centroid.numInstances()) + centroid.numInstances()), true));
    temp.append("\n");

    for (int i = 0; i < centroid.numAttributes(); i++) {
        String attName = centroid.attribute(i).name();
        temp.append(attName);
        for (int j = 0; j < maxAttWidth - attName.length(); j++) {
            temp.append(" ");
        }

        String strVal;
        String valMeanMode;

        for (int j = 0; j < numCluster; j++) {
            if (centroid.attribute(i).isNominal()) {
                if (centroid.instance(j).isMissing(i)) {
                    valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
                } else {
                    valMeanMode = pad(
                            (strVal = centroid.attribute(i).value((int) centroid.instance(j).value(i))), " ",
                            maxWidth + 1 - strVal.length(), true);
                }
            } else {
                if (centroid.instance(j).isMissing(i)) {
                    valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true);
                } else {
                    valMeanMode = pad(
                            (strVal = Utils.doubleToString(centroid.instance(j).value(i), maxWidth, 4).trim()),
                            " ", maxWidth + 1 - strVal.length(), true);
                }
            }
            temp.append(valMeanMode);
        }
        temp.append("\n");
    }

    temp.append("\n\n");
    return temp.toString();
}

From source file:br.ufrn.ia.core.clustering.EMIaProject.java

License:Open Source License

public String toString() {
    if (m_displayModelInOldFormat) {
        return toStringOriginal();
    }//from www.  j ava  2s  .c om

    if (m_priors == null) {
        return "No clusterer built yet!";
    }
    StringBuffer temp = new StringBuffer();
    temp.append("\nEM\n==\n");
    if (m_initialNumClusters == -1) {
        temp.append("\nNumber of clusters selected by cross validation: " + m_num_clusters + "\n");
    } else {
        temp.append("\nNumber of clusters: " + m_num_clusters + "\n");
    }

    int maxWidth = 0;
    int maxAttWidth = 0;
    boolean containsKernel = false;

    // set up max widths
    // attributes
    for (int i = 0; i < m_num_attribs; i++) {
        Attribute a = m_theInstances.attribute(i);
        if (a.name().length() > maxAttWidth) {
            maxAttWidth = m_theInstances.attribute(i).name().length();
        }
        if (a.isNominal()) {
            // check values
            for (int j = 0; j < a.numValues(); j++) {
                String val = a.value(j) + "  ";
                if (val.length() > maxAttWidth) {
                    maxAttWidth = val.length();
                }
            }
        }
    }

    for (int i = 0; i < m_num_clusters; i++) {
        for (int j = 0; j < m_num_attribs; j++) {
            if (m_theInstances.attribute(j).isNumeric()) {
                // check mean and std. dev. against maxWidth
                double mean = Math.log(Math.abs(m_modelNormal[i][j][0])) / Math.log(10.0);
                double stdD = Math.log(Math.abs(m_modelNormal[i][j][1])) / Math.log(10.0);
                double width = (mean > stdD) ? mean : stdD;
                if (width < 0) {
                    width = 1;
                }
                // decimal + # decimal places + 1
                width += 6.0;
                if ((int) width > maxWidth) {
                    maxWidth = (int) width;
                }
            } else {
                // nominal distributions
                DiscreteEstimator d = (DiscreteEstimator) m_model[i][j];
                for (int k = 0; k < d.getNumSymbols(); k++) {
                    String size = Utils.doubleToString(d.getCount(k), maxWidth, 4).trim();
                    if (size.length() > maxWidth) {
                        maxWidth = size.length();
                    }
                }
                int sum = Utils.doubleToString(d.getSumOfCounts(), maxWidth, 4).trim().length();
                if (sum > maxWidth) {
                    maxWidth = sum;
                }
            }
        }
    }

    if (maxAttWidth < "Attribute".length()) {
        maxAttWidth = "Attribute".length();
    }

    maxAttWidth += 2;

    temp.append("\n\n");
    temp.append(pad("Cluster", " ", (maxAttWidth + maxWidth + 1) - "Cluster".length(), true));

    temp.append("\n");
    temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false));

    // cluster #'s
    for (int i = 0; i < m_num_clusters; i++) {
        String classL = "" + i;
        temp.append(pad(classL, " ", maxWidth + 1 - classL.length(), true));
    }
    temp.append("\n");

    // cluster priors
    temp.append(pad("", " ", maxAttWidth, true));
    for (int i = 0; i < m_num_clusters; i++) {
        String priorP = Utils.doubleToString(m_priors[i], maxWidth, 2).trim();
        priorP = "(" + priorP + ")";
        temp.append(pad(priorP, " ", maxWidth + 1 - priorP.length(), true));
    }

    temp.append("\n");
    temp.append(pad("", "=", maxAttWidth + (maxWidth * m_num_clusters) + m_num_clusters + 1, true));
    temp.append("\n");

    for (int i = 0; i < m_num_attribs; i++) {
        String attName = m_theInstances.attribute(i).name();
        temp.append(attName + "\n");

        if (m_theInstances.attribute(i).isNumeric()) {
            String meanL = "  mean";
            temp.append(pad(meanL, " ", maxAttWidth + 1 - meanL.length(), false));
            for (int j = 0; j < m_num_clusters; j++) {
                // means
                String mean = Utils.doubleToString(m_modelNormal[j][i][0], maxWidth, 4).trim();
                temp.append(pad(mean, " ", maxWidth + 1 - mean.length(), true));
            }
            temp.append("\n");
            // now do std deviations
            String stdDevL = "  std. dev.";
            temp.append(pad(stdDevL, " ", maxAttWidth + 1 - stdDevL.length(), false));
            for (int j = 0; j < m_num_clusters; j++) {
                String stdDev = Utils.doubleToString(m_modelNormal[j][i][1], maxWidth, 4).trim();
                temp.append(pad(stdDev, " ", maxWidth + 1 - stdDev.length(), true));
            }
            temp.append("\n\n");
        } else {
            Attribute a = m_theInstances.attribute(i);
            for (int j = 0; j < a.numValues(); j++) {
                String val = "  " + a.value(j);
                temp.append(pad(val, " ", maxAttWidth + 1 - val.length(), false));
                for (int k = 0; k < m_num_clusters; k++) {
                    DiscreteEstimator d = (DiscreteEstimator) m_model[k][i];
                    String count = Utils.doubleToString(d.getCount(j), maxWidth, 4).trim();
                    temp.append(pad(count, " ", maxWidth + 1 - count.length(), true));
                }
                temp.append("\n");
            }
            // do the totals
            String total = "  [total]";
            temp.append(pad(total, " ", maxAttWidth + 1 - total.length(), false));
            for (int k = 0; k < m_num_clusters; k++) {
                DiscreteEstimator d = (DiscreteEstimator) m_model[k][i];
                String count = Utils.doubleToString(d.getSumOfCounts(), maxWidth, 4).trim();
                temp.append(pad(count, " ", maxWidth + 1 - count.length(), true));
            }
            temp.append("\n");
        }
    }

    return temp.toString();
}