List of usage examples for weka.core Attribute numValues
public finalint numValues()
From source file:GrowTree.java
Attribute bestSplit(Instances D) {
double imin = 1.0;
Attribute fbest = null;/*from w ww . j a va2 s . c om*/
Enumeration enat = D.enumerateAttributes();
while (enat.hasMoreElements()) {
Attribute a = (Attribute) enat.nextElement();
//split D into subsets d1 to dn based on values vi based on features
Instances[] split = new Instances[a.numValues()];
for (int i = 0; i < a.numValues(); i++) {
split[i] = new Instances(D, D.numInstances());
}
Enumeration x = D.enumerateInstances();
while (x.hasMoreElements()) {
Instance in = (Instance) x.nextElement();
split[(int) in.value(a)].add(in);
}
for (int i = 0; i < split.length; i++) {
split[i].compactify();
}
for (int i = 0; i < a.numValues(); i++) {
if (imp(split[i]) < imin) {
imin = imp(split[i]);
fbest = a; //evaluate the best feature to make root
}
}
}
return fbest;
}
From source file:ID3Chi.java
License:Open Source License
/** * Computes Chi-Square function for an attribute. * * @param data/*from www .j av a 2 s . c o m*/ * the data for which info gain is to be computed * @param att * the attribute * @return the chi-square for the given attribute and data * @throws Exception * if computation fails */ private double computeChiSquare(Instances data, Attribute att) throws Exception { double chiSquare = 0; double[] classCounts = GetClassCounts(data); Instances[] subset = splitData(data, att); for (int j = 0; j < att.numValues(); j++) { if (subset[j].numInstances() > 0) { chiSquare += computeChiSquareForSubset(subset[j], att, classCounts, data.numInstances()); } } return chiSquare; }
From source file:ID3Chi.java
License:Open Source License
/** * Computes information gain for an attribute. * * @param data//from w ww . ja v a 2 s.c o m * the data for which info gain is to be computed * @param att * the attribute * @param entropyOfAllData * entropy of data set * @return the information gain for the given attribute and data * @throws Exception * if computation fails */ private double computeInfoGain(Instances data, Attribute att, double entropyOfAllData) throws Exception { double infoGain = entropyOfAllData; Instances[] subset = splitData(data, att); int numUnknown = subset[att.numValues()].numInstances(); if (numUnknown == data.numInstances()) { return 0; } double[] classCountsUnknownData = GetClassCounts(subset[att.numValues()]); for (int j = 0; j < att.numValues(); j++) { if (subset[j].numInstances() > 0) { double ratio = (double) subset[j].numInstances() / (double) data.numInstances(); infoGain -= (((double) subset[j].numInstances() + (double) numUnknown * ratio) / (double) data.numInstances()) * computeEntropyWithUnknowns(subset[j], subset[att.numValues()], classCountsUnknownData, ratio); } } return infoGain; }
From source file:ID3Chi.java
License:Open Source License
/** * Splits a dataset according to the values of a nominal attribute. * * @param data//from w ww. j a va2 s. c om * the data which is to be split * @param att * the attribute to be used for splitting * @return the sets of instances produced by the split */ private Instances[] splitData(Instances data, Attribute att) { // [att.numValues()] is location for "unknown" values Instances[] subset = new Instances[att.numValues() + 1]; for (int j = 0; j <= att.numValues(); j++) { subset[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.isMissing(att)) { subset[att.numValues()].add(inst); } else { subset[(int) inst.value(att)].add(inst); } } for (int i = 0; i < subset.length; i++) { subset[i].compactify(); } return subset; }
From source file:adams.flow.sink.WekaCostBenefitAnalysis.java
License:Open Source License
/** * Plots the token (the panel and dialog have already been created at * this stage).// ww w. j av a2s.c o m * * @param token the token to display */ @Override protected void display(Token token) { Evaluation eval; Attribute classAtt; Attribute classAttToUse; int classValue; ThresholdCurve tc; Instances result; ArrayList<String> newNames; CostBenefitAnalysis cbAnalysis; PlotData2D tempd; boolean[] cp; int n; try { if (token.getPayload() instanceof WekaEvaluationContainer) eval = (Evaluation) ((WekaEvaluationContainer) token.getPayload()) .getValue(WekaEvaluationContainer.VALUE_EVALUATION); else eval = (Evaluation) token.getPayload(); if (eval.predictions() == null) { getLogger().severe("No predictions available from Evaluation object!"); return; } classAtt = eval.getHeader().classAttribute(); m_ClassIndex.setData(classAtt); classValue = m_ClassIndex.getIntIndex(); tc = new ThresholdCurve(); result = tc.getCurve(eval.predictions(), classValue); // Create a dummy class attribute with the chosen // class value as index 0 (if necessary). classAttToUse = eval.getHeader().classAttribute(); if (classValue != 0) { newNames = new ArrayList<>(); newNames.add(classAtt.value(classValue)); for (int k = 0; k < classAtt.numValues(); k++) { if (k != classValue) newNames.add(classAtt.value(k)); } classAttToUse = new Attribute(classAtt.name(), newNames); } // assemble plot data tempd = new PlotData2D(result); tempd.setPlotName(result.relationName()); tempd.m_alwaysDisplayPointsOfThisSize = 10; // specify which points are connected cp = new boolean[result.numInstances()]; for (n = 1; n < cp.length; n++) cp[n] = true; tempd.setConnectPoints(cp); // add plot m_CostBenefitPanel.setCurveData(tempd, classAttToUse); } catch (Exception e) { handleException("Failed to display token: " + token, e); } }
From source file:adams.flow.sink.WekaCostBenefitAnalysis.java
License:Open Source License
/** * Creates a new panel for the token.//from w w w . j a v a 2s . co m * * @param token the token to display in a new panel, can be null * @return the generated panel */ public AbstractDisplayPanel createDisplayPanel(Token token) { AbstractDisplayPanel result; String name; if (token != null) name = "Cost curve (" + getEvaluation(token).getHeader().relationName() + ")"; else name = "Cost curve"; result = new AbstractComponentDisplayPanel(name) { private static final long serialVersionUID = -3513994354297811163L; protected CostBenefitAnalysis m_VisualizePanel; @Override protected void initGUI() { super.initGUI(); setLayout(new BorderLayout()); m_VisualizePanel = new CostBenefitAnalysis(); add(m_VisualizePanel, BorderLayout.CENTER); } @Override public void display(Token token) { try { Evaluation eval = getEvaluation(token); Attribute classAtt = eval.getHeader().classAttribute(); m_ClassIndex.setData(classAtt); int classValue = m_ClassIndex.getIntIndex(); ThresholdCurve tc = new ThresholdCurve(); Instances result = tc.getCurve(eval.predictions(), classValue); // Create a dummy class attribute with the chosen // class value as index 0 (if necessary). Attribute classAttToUse = eval.getHeader().classAttribute(); if (classValue != 0) { ArrayList<String> newNames = new ArrayList<>(); newNames.add(classAtt.value(classValue)); for (int k = 0; k < classAtt.numValues(); k++) { if (k != classValue) newNames.add(classAtt.value(k)); } classAttToUse = new Attribute(classAtt.name(), newNames); } // assemble plot data PlotData2D tempd = new PlotData2D(result); tempd.setPlotName(result.relationName()); tempd.m_alwaysDisplayPointsOfThisSize = 10; // specify which points are connected boolean[] cp = new boolean[result.numInstances()]; for (int n = 1; n < cp.length; n++) cp[n] = true; tempd.setConnectPoints(cp); // add plot m_VisualizePanel.setCurveData(tempd, classAttToUse); } catch (Exception e) { getLogger().log(Level.SEVERE, "Failed to display token: " + token, e); } } @Override public JComponent supplyComponent() { return m_VisualizePanel; } @Override public void clearPanel() { } public void cleanUp() { } }; if (token != null) result.display(token); return result; }
From source file:adams.flow.transformer.WekaReorderAttributesToReference.java
License:Open Source License
/** * Executes the flow item.//from ww w . j av a 2 s .c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances dataOld; Instance instOld; Instances dataNew; Instance instNew; Attribute att; int i; StringBuilder order; List<Add> adds; Add add; int index; StringBuilder labels; int n; List<Filter> filters; Reorder reorder; result = null; if (m_OnTheFly && (m_Reference == null)) { result = setUpReference(); if (result != null) return result; } dataNew = null; instNew = null; // get input data if (m_InputToken.getPayload() instanceof Instance) { instOld = (Instance) m_InputToken.getPayload(); dataOld = instOld.dataset(); } else { instOld = null; dataOld = (Instances) m_InputToken.getPayload(); } // do we need to initialize filter? if (m_InitializeOnce || (m_Reorder == null)) { // check incoming data if (!m_Lenient) { for (i = 0; i < m_Reference.numAttributes(); i++) { att = m_Reference.attribute(i); if (dataOld.attribute(att.name()) == null) { if (result == null) result = "Missing attribute(s) in incoming data: " + att.name(); else result += ", " + att.name(); } } if (result != null) getLogger().severe(result); } if (result == null) { try { // determine indices order = new StringBuilder(); adds = new ArrayList<Add>(); for (i = 0; i < m_Reference.numAttributes(); i++) { att = m_Reference.attribute(i); if (dataOld.attribute(att.name()) == null) { index = dataOld.numAttributes() + adds.size(); add = new Add(); add.setAttributeIndex("last"); add.setAttributeName(att.name()); add.setAttributeType(new SelectedTag(att.type(), Add.TAGS_TYPE)); if (att.isNominal()) { labels = new StringBuilder(); for (n = 0; n < att.numValues(); n++) { if (labels.length() > 0) labels.append(","); labels.append(att.value(n)); } add.setNominalLabels(labels.toString()); } adds.add(add); } else { index = dataOld.attribute(att.name()).index(); } if (order.length() > 0) order.append(","); order.append((index + 1)); } // build reorder filter reorder = new Reorder(); reorder.setAttributeIndices(order.toString()); // build multifilter filters = new ArrayList<Filter>(); filters.addAll(adds); filters.add(reorder); m_Reorder = new MultiFilter(); m_Reorder.setFilters(filters.toArray(new Filter[filters.size()])); // initialize filter m_Reorder.setInputFormat(dataOld); } catch (Exception e) { result = handleException("Failed to initialize reorder filter!", e); } } } // reorder data if (result == null) { try { if (instOld != null) { m_Reorder.input(instOld); m_Reorder.batchFinished(); instNew = m_Reorder.output(); if (m_KeepRelationName) instNew.dataset().setRelationName(dataOld.relationName()); } else { dataNew = Filter.useFilter(dataOld, m_Reorder); if (m_KeepRelationName) dataNew.setRelationName(dataOld.relationName()); } } catch (Exception e) { result = handleException("Failed to reorder data!", e); instNew = null; dataNew = null; } } if (instNew != null) m_OutputToken = new Token(instNew); else if (dataNew != null) m_OutputToken = new Token(dataNew); return result; }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * return a string describing this clusterer * /*www. ja v a2 s. c o m*/ * @return a description of the clusterer as a string */ @Override public String toString() { if (m_ClusterCentroids == null) { return "No clusterer built yet!"; } int maxWidth = 0; int maxAttWidth = 0; boolean containsNumeric = false; for (int i = 0; i < m_NumClusters; i++) { for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) { if (m_ClusterCentroids.attribute(j).name().length() > maxAttWidth) { maxAttWidth = m_ClusterCentroids.attribute(j).name().length(); } if (m_ClusterCentroids.attribute(j).isNumeric()) { containsNumeric = true; double width = Math.log(Math.abs(m_ClusterCentroids.instance(i).value(j))) / Math.log(10.0); // System.err.println(m_ClusterCentroids.instance(i).value(j)+" "+width); if (width < 0) { width = 1; } // decimal + # decimal places + 1 width += 6.0; if ((int) width > maxWidth) { maxWidth = (int) width; } } } } for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) { if (m_ClusterCentroids.attribute(i).isNominal()) { Attribute a = m_ClusterCentroids.attribute(i); for (int j = 0; j < m_ClusterCentroids.numInstances(); j++) { String val = a.value((int) m_ClusterCentroids.instance(j).value(i)); if (val.length() > maxWidth) { maxWidth = val.length(); } } for (int j = 0; j < a.numValues(); j++) { String val = a.value(j) + " "; if (val.length() > maxAttWidth) { maxAttWidth = val.length(); } } } } if (m_displayStdDevs) { // check for maximum width of maximum frequency count for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) { if (m_ClusterCentroids.attribute(i).isNominal()) { int maxV = Utils.maxIndex(m_FullNominalCounts[i]); /* * int percent = (int)((double)m_FullNominalCounts[i][maxV] / * Utils.sum(m_ClusterSizes) * 100.0); */ int percent = 6; // max percent width (100%) String nomV = "" + m_FullNominalCounts[i][maxV]; // + " (" + percent + "%)"; if (nomV.length() + percent > maxWidth) { maxWidth = nomV.length() + 1; } } } } // check for size of cluster sizes for (int m_ClusterSize : m_ClusterSizes) { String size = "(" + m_ClusterSize + ")"; if (size.length() > maxWidth) { maxWidth = size.length(); } } if (m_displayStdDevs && maxAttWidth < "missing".length()) { maxAttWidth = "missing".length(); } String plusMinus = "+/-"; maxAttWidth += 2; if (m_displayStdDevs && containsNumeric) { maxWidth += plusMinus.length(); } if (maxAttWidth < "Attribute".length() + 2) { maxAttWidth = "Attribute".length() + 2; } if (maxWidth < "Full Data".length()) { maxWidth = "Full Data".length() + 1; } if (maxWidth < "missing".length()) { maxWidth = "missing".length() + 1; } StringBuffer temp = new StringBuffer(); // String naString = "N/A"; /* * for (int i = 0; i < maxWidth+2; i++) { naString += " "; } */ temp.append("\nkMeans\n======\n"); temp.append("\nNumber of iterations: " + m_Iterations + "\n"); if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir) { temp.append("Within cluster sum of squared errors: " + Utils.sum(m_squaredErrors)); } else { temp.append("Sum of within cluster distances: " + Utils.sum(m_squaredErrors)); } if (!m_dontReplaceMissing) { temp.append("\nMissing values globally replaced with mean/mode"); } temp.append("\n\nCluster centroids:\n"); temp.append(pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2)) - "Cluster#".length(), true)); temp.append("\n"); temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false)); temp.append(pad("Full Data", " ", maxWidth + 1 - "Full Data".length(), true)); // cluster numbers for (int i = 0; i < m_NumClusters; i++) { String clustNum = "" + i; temp.append(pad(clustNum, " ", maxWidth + 1 - clustNum.length(), true)); } temp.append("\n"); // cluster sizes String cSize = "(" + Utils.sum(m_ClusterSizes) + ")"; temp.append(pad(cSize, " ", maxAttWidth + maxWidth + 1 - cSize.length(), true)); for (int i = 0; i < m_NumClusters; i++) { cSize = "(" + m_ClusterSizes[i] + ")"; temp.append(pad(cSize, " ", maxWidth + 1 - cSize.length(), true)); } temp.append("\n"); temp.append(pad("", "=", maxAttWidth + (maxWidth * (m_ClusterCentroids.numInstances() + 1) + m_ClusterCentroids.numInstances() + 1), true)); temp.append("\n"); for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) { String attName = m_ClusterCentroids.attribute(i).name(); temp.append(attName); for (int j = 0; j < maxAttWidth - attName.length(); j++) { temp.append(" "); } String strVal; String valMeanMode; // full data if (m_ClusterCentroids.attribute(i).isNominal()) { if (m_FullMeansOrMediansOrModes[i] == -1) { // missing valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad( (strVal = m_ClusterCentroids.attribute(i).value((int) m_FullMeansOrMediansOrModes[i])), " ", maxWidth + 1 - strVal.length(), true); } } else { if (Double.isNaN(m_FullMeansOrMediansOrModes[i])) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad( (strVal = Utils.doubleToString(m_FullMeansOrMediansOrModes[i], maxWidth, 4).trim()), " ", maxWidth + 1 - strVal.length(), true); } } temp.append(valMeanMode); for (int j = 0; j < m_NumClusters; j++) { if (m_ClusterCentroids.attribute(i).isNominal()) { if (m_ClusterCentroids.instance(j).isMissing(i)) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad( (strVal = m_ClusterCentroids.attribute(i) .value((int) m_ClusterCentroids.instance(j).value(i))), " ", maxWidth + 1 - strVal.length(), true); } } else { if (m_ClusterCentroids.instance(j).isMissing(i)) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad((strVal = Utils .doubleToString(m_ClusterCentroids.instance(j).value(i), maxWidth, 4).trim()), " ", maxWidth + 1 - strVal.length(), true); } } temp.append(valMeanMode); } temp.append("\n"); if (m_displayStdDevs) { // Std devs/max nominal String stdDevVal = ""; if (m_ClusterCentroids.attribute(i).isNominal()) { // Do the values of the nominal attribute Attribute a = m_ClusterCentroids.attribute(i); for (int j = 0; j < a.numValues(); j++) { // full data String val = " " + a.value(j); temp.append(pad(val, " ", maxAttWidth + 1 - val.length(), false)); int count = m_FullNominalCounts[i][j]; int percent = (int) ((double) m_FullNominalCounts[i][j] / Utils.sum(m_ClusterSizes) * 100.0); String percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); // Clusters for (int k = 0; k < m_NumClusters; k++) { count = m_ClusterNominalCounts[k][i][j]; percent = (int) ((double) m_ClusterNominalCounts[k][i][j] / m_ClusterSizes[k] * 100.0); percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); } temp.append("\n"); } // missing (if any) if (m_FullMissingCounts[i] > 0) { // Full data temp.append(pad(" missing", " ", maxAttWidth + 1 - " missing".length(), false)); int count = m_FullMissingCounts[i]; int percent = (int) ((double) m_FullMissingCounts[i] / Utils.sum(m_ClusterSizes) * 100.0); String percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); // Clusters for (int k = 0; k < m_NumClusters; k++) { count = m_ClusterMissingCounts[k][i]; percent = (int) ((double) m_ClusterMissingCounts[k][i] / m_ClusterSizes[k] * 100.0); percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); } temp.append("\n"); } temp.append("\n"); } else { // Full data if (Double.isNaN(m_FullMeansOrMediansOrModes[i])) { stdDevVal = pad("--", " ", maxAttWidth + maxWidth + 1 - 2, true); } else { stdDevVal = pad( (strVal = plusMinus + Utils.doubleToString(m_FullStdDevs[i], maxWidth, 4).trim()), " ", maxWidth + maxAttWidth + 1 - strVal.length(), true); } temp.append(stdDevVal); // Clusters for (int j = 0; j < m_NumClusters; j++) { if (m_ClusterCentroids.instance(j).isMissing(i)) { stdDevVal = pad("--", " ", maxWidth + 1 - 2, true); } else { stdDevVal = pad((strVal = plusMinus + Utils .doubleToString(m_ClusterStdDevs.instance(j).value(i), maxWidth, 4).trim()), " ", maxWidth + 1 - strVal.length(), true); } temp.append(stdDevVal); } temp.append("\n\n"); } } } temp.append("\n\n"); return temp.toString(); }
From source file:aw_cluster.myKMeans.java
@Override public String toString() { if (centroid == null) { return "No clusterer built yet!"; }/*from ww w . j a v a 2s . c o m*/ int maxWidth = 0; int maxAttWidth = 0; boolean containsNumeric = false; for (int i = 0; i < numCluster; i++) { for (int j = 0; j < centroid.numAttributes(); j++) { if (centroid.attribute(j).name().length() > maxAttWidth) { maxAttWidth = centroid.attribute(j).name().length(); } if (centroid.attribute(j).isNumeric()) { containsNumeric = true; double width = Math.log(Math.abs(centroid.instance(i).value(j))) / Math.log(10.0); if (width < 0) { width = 1; } width += 6.0; if ((int) width > maxWidth) { maxWidth = (int) width; } } } } for (int i = 0; i < centroid.numAttributes(); i++) { if (centroid.attribute(i).isNominal()) { Attribute a = centroid.attribute(i); for (int j = 0; j < centroid.numInstances(); j++) { String val = a.value((int) centroid.instance(j).value(i)); if (val.length() > maxWidth) { maxWidth = val.length(); } } for (int j = 0; j < a.numValues(); j++) { String val = a.value(j) + " "; if (val.length() > maxAttWidth) { maxAttWidth = val.length(); } } } } // check for size of cluster sizes for (int i = 0; i < sizeEachCluster.length; i++) { String size = "(" + sizeEachCluster[i] + ")"; if (size.length() > maxWidth) { maxWidth = size.length(); } } String plusMinus = "+/-"; maxAttWidth += 2; if (maxAttWidth < "Attribute".length() + 2) { maxAttWidth = "Attribute".length() + 2; } if (maxWidth < "Full Data".length()) { maxWidth = "Full Data".length() + 1; } if (maxWidth < "missing".length()) { maxWidth = "missing".length() + 1; } StringBuffer temp = new StringBuffer(); temp.append("\nkMeans\n======\n"); temp.append("\nNumber of iterations: " + numIteration + "\n"); if (distanceFunction instanceof EuclideanDistance) { temp.append("Within cluster sum of squared errors: " + Utils.sum(squaredError)); } else { temp.append("Sum of within cluster distances: " + Utils.sum(squaredError)); } temp.append("\n\nCluster centroid:\n"); temp.append(pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2)) - "Cluster#".length(), true)); temp.append("\n"); temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false)); // cluster numbers for (int i = 0; i < numCluster; i++) { String clustNum = "" + i; temp.append(pad(clustNum, " ", maxWidth + 1 - clustNum.length(), true)); } temp.append("\n"); // cluster sizes String cSize = ""; temp.append(pad(cSize, " ", maxAttWidth - cSize.length(), true)); for (int i = 0; i < numCluster; i++) { cSize = "(" + sizeEachCluster[i] + ")"; temp.append(pad(cSize, " ", maxWidth + 1 - cSize.length(), true)); } temp.append("\n"); temp.append( pad("", "=", maxAttWidth + (maxWidth * (centroid.numInstances()) + centroid.numInstances()), true)); temp.append("\n"); for (int i = 0; i < centroid.numAttributes(); i++) { String attName = centroid.attribute(i).name(); temp.append(attName); for (int j = 0; j < maxAttWidth - attName.length(); j++) { temp.append(" "); } String strVal; String valMeanMode; for (int j = 0; j < numCluster; j++) { if (centroid.attribute(i).isNominal()) { if (centroid.instance(j).isMissing(i)) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad( (strVal = centroid.attribute(i).value((int) centroid.instance(j).value(i))), " ", maxWidth + 1 - strVal.length(), true); } } else { if (centroid.instance(j).isMissing(i)) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad( (strVal = Utils.doubleToString(centroid.instance(j).value(i), maxWidth, 4).trim()), " ", maxWidth + 1 - strVal.length(), true); } } temp.append(valMeanMode); } temp.append("\n"); } temp.append("\n\n"); return temp.toString(); }
From source file:br.ufrn.ia.core.clustering.EMIaProject.java
License:Open Source License
public String toString() { if (m_displayModelInOldFormat) { return toStringOriginal(); }//from www. j ava 2s .c om if (m_priors == null) { return "No clusterer built yet!"; } StringBuffer temp = new StringBuffer(); temp.append("\nEM\n==\n"); if (m_initialNumClusters == -1) { temp.append("\nNumber of clusters selected by cross validation: " + m_num_clusters + "\n"); } else { temp.append("\nNumber of clusters: " + m_num_clusters + "\n"); } int maxWidth = 0; int maxAttWidth = 0; boolean containsKernel = false; // set up max widths // attributes for (int i = 0; i < m_num_attribs; i++) { Attribute a = m_theInstances.attribute(i); if (a.name().length() > maxAttWidth) { maxAttWidth = m_theInstances.attribute(i).name().length(); } if (a.isNominal()) { // check values for (int j = 0; j < a.numValues(); j++) { String val = a.value(j) + " "; if (val.length() > maxAttWidth) { maxAttWidth = val.length(); } } } } for (int i = 0; i < m_num_clusters; i++) { for (int j = 0; j < m_num_attribs; j++) { if (m_theInstances.attribute(j).isNumeric()) { // check mean and std. dev. against maxWidth double mean = Math.log(Math.abs(m_modelNormal[i][j][0])) / Math.log(10.0); double stdD = Math.log(Math.abs(m_modelNormal[i][j][1])) / Math.log(10.0); double width = (mean > stdD) ? mean : stdD; if (width < 0) { width = 1; } // decimal + # decimal places + 1 width += 6.0; if ((int) width > maxWidth) { maxWidth = (int) width; } } else { // nominal distributions DiscreteEstimator d = (DiscreteEstimator) m_model[i][j]; for (int k = 0; k < d.getNumSymbols(); k++) { String size = Utils.doubleToString(d.getCount(k), maxWidth, 4).trim(); if (size.length() > maxWidth) { maxWidth = size.length(); } } int sum = Utils.doubleToString(d.getSumOfCounts(), maxWidth, 4).trim().length(); if (sum > maxWidth) { maxWidth = sum; } } } } if (maxAttWidth < "Attribute".length()) { maxAttWidth = "Attribute".length(); } maxAttWidth += 2; temp.append("\n\n"); temp.append(pad("Cluster", " ", (maxAttWidth + maxWidth + 1) - "Cluster".length(), true)); temp.append("\n"); temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false)); // cluster #'s for (int i = 0; i < m_num_clusters; i++) { String classL = "" + i; temp.append(pad(classL, " ", maxWidth + 1 - classL.length(), true)); } temp.append("\n"); // cluster priors temp.append(pad("", " ", maxAttWidth, true)); for (int i = 0; i < m_num_clusters; i++) { String priorP = Utils.doubleToString(m_priors[i], maxWidth, 2).trim(); priorP = "(" + priorP + ")"; temp.append(pad(priorP, " ", maxWidth + 1 - priorP.length(), true)); } temp.append("\n"); temp.append(pad("", "=", maxAttWidth + (maxWidth * m_num_clusters) + m_num_clusters + 1, true)); temp.append("\n"); for (int i = 0; i < m_num_attribs; i++) { String attName = m_theInstances.attribute(i).name(); temp.append(attName + "\n"); if (m_theInstances.attribute(i).isNumeric()) { String meanL = " mean"; temp.append(pad(meanL, " ", maxAttWidth + 1 - meanL.length(), false)); for (int j = 0; j < m_num_clusters; j++) { // means String mean = Utils.doubleToString(m_modelNormal[j][i][0], maxWidth, 4).trim(); temp.append(pad(mean, " ", maxWidth + 1 - mean.length(), true)); } temp.append("\n"); // now do std deviations String stdDevL = " std. dev."; temp.append(pad(stdDevL, " ", maxAttWidth + 1 - stdDevL.length(), false)); for (int j = 0; j < m_num_clusters; j++) { String stdDev = Utils.doubleToString(m_modelNormal[j][i][1], maxWidth, 4).trim(); temp.append(pad(stdDev, " ", maxWidth + 1 - stdDev.length(), true)); } temp.append("\n\n"); } else { Attribute a = m_theInstances.attribute(i); for (int j = 0; j < a.numValues(); j++) { String val = " " + a.value(j); temp.append(pad(val, " ", maxAttWidth + 1 - val.length(), false)); for (int k = 0; k < m_num_clusters; k++) { DiscreteEstimator d = (DiscreteEstimator) m_model[k][i]; String count = Utils.doubleToString(d.getCount(j), maxWidth, 4).trim(); temp.append(pad(count, " ", maxWidth + 1 - count.length(), true)); } temp.append("\n"); } // do the totals String total = " [total]"; temp.append(pad(total, " ", maxAttWidth + 1 - total.length(), false)); for (int k = 0; k < m_num_clusters; k++) { DiscreteEstimator d = (DiscreteEstimator) m_model[k][i]; String count = Utils.doubleToString(d.getSumOfCounts(), maxWidth, 4).trim(); temp.append(pad(count, " ", maxWidth + 1 - count.length(), true)); } temp.append("\n"); } } return temp.toString(); }