List of usage examples for weka.core Utils sum
public staticint sum(int[] ints)
From source file:Bilbo.java
License:Open Source License
/** * Bagging method./*from w w w . jav a 2 s . com*/ * * @param data the training data to be used for generating the * bagged classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data, Instances p_unlabeledData) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // Has user asked to represent copies using weights? if (getRepresentCopiesUsingWeights() && !(m_Classifier instanceof WeightedInstancesHandler)) { throw new IllegalArgumentException("Cannot represent copies using weights when " + "base learner in bagging does not implement " + "WeightedInstancesHandler."); } // get fresh Instances object m_data = new Instances(data); m_unlabeledData = new Instances(p_unlabeledData); super.buildClassifier(m_data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } m_random = new Random(m_Seed); m_inBag = null; if (m_CalcOutOfBag) m_inBag = new boolean[m_Classifiers.length][]; for (int j = 0; j < m_Classifiers.length; j++) { if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(m_random.nextInt()); } } //Insert oracle loop here TODO buildClassifiers(); Instances inst = new Instances(m_data); for (int i = 0; i < m_Classifiers.length; i++) { inst.clear(); ((NewTree) m_Classifiers[i]).GetTransductedInstances(inst); ((NewTree) m_Classifiers[i]).DoInduction(inst); // Ehm, do something boyski } // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = m_data.classAttribute().isNumeric(); for (int i = 0; i < m_data.numInstances(); i++) { double vote; double[] votes; if (numeric) votes = new double[1]; else votes = new double[m_data.numClasses()]; // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (m_inBag[j][i]) continue; if (numeric) { double pred = ((NewTree) m_Classifiers[j]).classifyInstance(m_data.instance(i)); if (!Utils.isMissingValue(pred)) { votes[0] += pred; voteCount++; } } else { voteCount++; double[] newProbs = ((NewTree) m_Classifiers[j]) .distributionForInstance(m_data.instance(i)); // average the probability estimates for (int k = 0; k < newProbs.length; k++) { votes[k] += newProbs[k]; } } } // "vote" if (numeric) { if (voteCount == 0) { vote = Utils.missingValue(); } else { vote = votes[0] / voteCount; // average } } else { if (Utils.eq(Utils.sum(votes), 0)) { vote = Utils.missingValue(); } else { vote = Utils.maxIndex(votes); // predicted class Utils.normalize(votes); } } // error for instance if (!Utils.isMissingValue(vote) && !m_data.instance(i).classIsMissing()) { outOfBagCount += m_data.instance(i).weight(); if (numeric) { errorSum += (StrictMath.abs(vote - m_data.instance(i).classValue()) * m_data.instance(i).weight()) / m_data.instance(i).classValue(); } else { if (vote != m_data.instance(i).classValue()) errorSum += m_data.instance(i).weight(); } } } if (outOfBagCount > 0) { m_OutOfBagError = errorSum / outOfBagCount; } } else { m_OutOfBagError = 0; } // save memory m_data = null; }
From source file:Bilbo.java
License:Open Source License
/** * Calculates the class membership probabilities for the given test * instance./*from w ww.jav a2s. co m*/ * * @param instance the instance to be classified * @return preedicted class probability distribution * @throws Exception if distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] sums = new double[instance.numClasses()], newProbs; double numPreds = 0; for (int i = 0; i < m_NumIterations; i++) { if (instance.classAttribute().isNumeric() == true) { double pred = ((NewTree) m_Classifiers[i]).classifyInstance(instance); if (!Utils.isMissingValue(pred)) { sums[0] += pred; numPreds++; } } else { newProbs = ((NewTree) m_Classifiers[i]).distributionForInstance(instance); for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j]; } } if (instance.classAttribute().isNumeric() == true) { if (numPreds == 0) { sums[0] = Utils.missingValue(); } else { sums[0] /= numPreds; } return sums; } else if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } }
From source file:PrincipalComponents.java
License:Open Source License
private void buildAttributeConstructor(Instances data) throws Exception { m_eigenvalues = null;//w w w . j a v a 2s . c o m m_outputNumAtts = -1; m_attributeFilter = null; m_nominalToBinFilter = null; m_sumOfEigenValues = 0.0; m_trainInstances = new Instances(data); // make a copy of the training data so that we can get the class // column to append to the transformed data (if necessary) m_trainHeader = new Instances(m_trainInstances, 0); m_replaceMissingFilter = new ReplaceMissingValues(); m_replaceMissingFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_replaceMissingFilter); /* * if (m_normalize) { m_normalizeFilter = new Normalize(); * m_normalizeFilter.setInputFormat(m_trainInstances); m_trainInstances * = Filter.useFilter(m_trainInstances, m_normalizeFilter); } */ m_nominalToBinFilter = new NominalToBinary(); m_nominalToBinFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_nominalToBinFilter); // delete any attributes with only one distinct value or are all missing Vector<Integer> deleteCols = new Vector<Integer>(); for (int i = 0; i < m_trainInstances.numAttributes(); i++) { if (m_trainInstances.numDistinctValues(i) <= 1) { deleteCols.addElement(new Integer(i)); } } if (m_trainInstances.classIndex() >= 0) { // get rid of the class column m_hasClass = true; m_classIndex = m_trainInstances.classIndex(); deleteCols.addElement(new Integer(m_classIndex)); } // remove columns from the data if necessary if (deleteCols.size() > 0) { m_attributeFilter = new Remove(); int[] todelete = new int[deleteCols.size()]; for (int i = 0; i < deleteCols.size(); i++) { todelete[i] = (deleteCols.elementAt(i)).intValue(); } m_attributeFilter.setAttributeIndicesArray(todelete); m_attributeFilter.setInvertSelection(false); m_attributeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter); } // can evaluator handle the processed data ? e.g., enough attributes? getCapabilities().testWithFail(m_trainInstances); m_numInstances = m_trainInstances.numInstances(); m_numAttribs = m_trainInstances.numAttributes(); fillCovariance(); SymmDenseEVD evd = SymmDenseEVD.factorize(m_correlation); m_eigenvectors = Matrices.getArray(evd.getEigenvectors()); m_eigenvalues = evd.getEigenvalues(); /* * for (int i = 0; i < m_numAttribs; i++) { for (int j = 0; j < * m_numAttribs; j++) { System.err.println(v[i][j] + " "); } * System.err.println(d[i]); } */ // any eigenvalues less than 0 are not worth anything --- change to 0 for (int i = 0; i < m_eigenvalues.length; i++) { if (m_eigenvalues[i] < 0) { m_eigenvalues[i] = 0.0; } } m_sortedEigens = Utils.sort(m_eigenvalues); m_sumOfEigenValues = Utils.sum(m_eigenvalues); m_transformedFormat = setOutputFormat(); if (m_transBackToOriginal) { m_originalSpaceFormat = setOutputFormatOriginal(); // new ordered eigenvector matrix int numVectors = (m_transformedFormat.classIndex() < 0) ? m_transformedFormat.numAttributes() : m_transformedFormat.numAttributes() - 1; double[][] orderedVectors = new double[m_eigenvectors.length][numVectors + 1]; // try converting back to the original space for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) { for (int j = 0; j < m_numAttribs; j++) { orderedVectors[j][m_numAttribs - i] = m_eigenvectors[j][m_sortedEigens[i]]; } } // transpose the matrix int nr = orderedVectors.length; int nc = orderedVectors[0].length; m_eTranspose = new double[nc][nr]; for (int i = 0; i < nc; i++) { for (int j = 0; j < nr; j++) { m_eTranspose[i][j] = orderedVectors[j][i]; } } } }
From source file:BaggingImprove.java
/** * Bagging method.//www.j a va 2 s . com * * @param data the training data to be used for generating the bagged * classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); //data.deleteWithMissingClass(); super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } //+ System.out.println("Classifier length" + m_Classifiers.length); int bagSize = data.numInstances() * m_BagSizePercent / 100; //+ System.out.println("Bag Size " + bagSize); Random random = new Random(m_Seed); boolean[][] inBag = null; if (m_CalcOutOfBag) { inBag = new boolean[m_Classifiers.length][]; } //+ //inisialisasi nama penamaan model BufferedWriter writer = new BufferedWriter(new FileWriter("Bootstrap.txt")); for (int j = 0; j < m_Classifiers.length; j++) { Instances bagData = null; // create the in-bag dataset if (m_CalcOutOfBag) { inBag[j] = new boolean[data.numInstances()]; //System.out.println("Inbag1 " + inBag[0][1]); //bagData = resampleWithWeights(data, random, inBag[j]); bagData = data.resampleWithWeights(random, inBag[j]); //System.out.println("num after resample " + bagData.numInstances()); //+ // for (int k = 0; k < bagData.numInstances(); k++) { // System.out.println("Bag Data after resample [calc out bag]" + bagData.instance(k)); // } } else { //+ System.out.println("Not m_Calc out of bag"); System.out.println("Please configure code inside!"); bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } if (m_Classifier instanceof Randomizable) { //+ System.out.println("Randomizable"); ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); } //write bootstrap into file writer.write("Bootstrap " + j); writer.newLine(); writer.write(bagData.toString()); writer.newLine(); System.out.println("Berhasil menyimpan bootstrap ke file "); System.out.println("Bootstrap " + j + 1); // textarea.append("\nBootsrap " + (j + 1)); //System.out.println("num instance kedua kali "+bagData.numInstances()); for (int b = 1; b < bagData.numInstances(); b++) { System.out.println("" + bagData.instance(b)); // textarea.append("\n" + bagData.instance(b)); } // //+ // build the classifier m_Classifiers[j].buildClassifier(bagData); // //+ // // SerializationHelper serialization = new SerializationHelper(); // serialization.write("KnnData"+model+".model", m_Classifiers[j]); // System.out.println("Finish write into model"); // model++; } writer.flush(); writer.close(); // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = data.classAttribute().isNumeric(); for (int i = 0; i < data.numInstances(); i++) { double vote; double[] votes; if (numeric) { votes = new double[1]; } else { votes = new double[data.numClasses()]; } // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (inBag[j][i]) { continue; } voteCount++; // double pred = m_Classifiers[j].classifyInstance(data.instance(i)); if (numeric) { // votes[0] += pred; votes[0] = m_Classifiers[j].classifyInstance(data.instance(i)); } else { // votes[(int) pred]++; double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i)); //- // for(double a : newProbs) // { // System.out.println("Double new probs %.f "+a); // } // average the probability estimates for (int k = 0; k < newProbs.length; k++) { votes[k] += newProbs[k]; } } } System.out.println("Vote count %d" + voteCount); // "vote" if (numeric) { vote = votes[0]; if (voteCount > 0) { vote /= voteCount; // average } } else { if (Utils.eq(Utils.sum(votes), 0)) { } else { Utils.normalize(votes); } vote = Utils.maxIndex(votes); // predicted class //- System.out.println("Vote " + vote); } // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else if (vote != data.instance(i).classValue()) { //+ System.out.println("Vote terakhir" + data.instance(i).classValue()); errorSum += data.instance(i).weight(); } } m_OutOfBagError = errorSum / outOfBagCount; } else { m_OutOfBagError = 0; } }
From source file:BaggingImprove.java
/** * Calculates the class membership probabilities for the given test * instance./*from w w w . j ava 2s . c o m*/ * * @param instance the instance to be classified * @return predicted class probability distribution * @throws Exception if distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { double[] sums = new double[instance.numClasses()], newProbs; //- //System.out.println("\nDistribution For Instance\n"); for (int i = 0; i < m_NumIterations; i++) { if (instance.classAttribute().isNumeric() == true) { //System.out.println(m_Classifiers[i].classifyInstance(instance)); sums[0] += m_Classifiers[i].classifyInstance(instance); } else { //System.out.println(m_Classifiers[i].distributionForInstance(instance)); newProbs = m_Classifiers[i].distributionForInstance(instance); //- // for (int j = 0; j < newProbs.length; j++) { // sums[j] += newProbs[j]; // System.out.println("Sums "+sums[j]); // } //+ } } if (instance.classAttribute().isNumeric() == true) { sums[0] /= m_NumIterations; return sums; } else if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums; } }
From source file:MultiClassClassifier.java
License:Open Source License
/** * Returns the distribution for an instance. * * @param inst the instance to get the distribution for * @return the distribution/* w w w. j a va2s .c o m*/ * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance inst) throws Exception { if (m_Classifiers.length == 1) { return m_Classifiers[0].distributionForInstance(inst); } double[] probs = new double[inst.numClasses()]; if (m_Method == METHOD_1_AGAINST_1) { double[][] r = new double[inst.numClasses()][inst.numClasses()]; double[][] n = new double[inst.numClasses()][inst.numClasses()]; for (int i = 0; i < m_ClassFilters.length; i++) { if (m_Classifiers[i] != null) { Instance tempInst = (Instance) inst.copy(); tempInst.setDataset(m_TwoClassDataset); double[] current = m_Classifiers[i].distributionForInstance(tempInst); Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices()); range.setUpper(m_ClassAttribute.numValues()); int[] pair = range.getSelection(); if (m_pairwiseCoupling && inst.numClasses() > 2) { r[pair[0]][pair[1]] = current[0]; n[pair[0]][pair[1]] = m_SumOfWeights[i]; } else { if (current[0] > current[1]) { probs[pair[0]] += 1.0; } else { probs[pair[1]] += 1.0; } } } } if (m_pairwiseCoupling && inst.numClasses() > 2) { return pairwiseCoupling(n, r); } } else { // error correcting style methods for (int i = 0; i < m_ClassFilters.length; i++) { m_ClassFilters[i].input(inst); m_ClassFilters[i].batchFinished(); double[] current = m_Classifiers[i].distributionForInstance(m_ClassFilters[i].output()); //Calibrate the binary classifier scores for (int j = 0; j < m_ClassAttribute.numValues(); j++) { if (((MakeIndicator) m_ClassFilters[i]).getValueRange().isInRange(j)) { probs[j] += current[1]; } else { probs[j] += current[0]; } } } } if (Utils.gr(Utils.sum(probs), 0)) { Utils.normalize(probs); return probs; } else { return m_ZeroR.distributionForInstance(inst); } }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * return a string describing this clusterer * /*from w w w .java 2 s.com*/ * @return a description of the clusterer as a string */ @Override public String toString() { if (m_ClusterCentroids == null) { return "No clusterer built yet!"; } int maxWidth = 0; int maxAttWidth = 0; boolean containsNumeric = false; for (int i = 0; i < m_NumClusters; i++) { for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) { if (m_ClusterCentroids.attribute(j).name().length() > maxAttWidth) { maxAttWidth = m_ClusterCentroids.attribute(j).name().length(); } if (m_ClusterCentroids.attribute(j).isNumeric()) { containsNumeric = true; double width = Math.log(Math.abs(m_ClusterCentroids.instance(i).value(j))) / Math.log(10.0); // System.err.println(m_ClusterCentroids.instance(i).value(j)+" "+width); if (width < 0) { width = 1; } // decimal + # decimal places + 1 width += 6.0; if ((int) width > maxWidth) { maxWidth = (int) width; } } } } for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) { if (m_ClusterCentroids.attribute(i).isNominal()) { Attribute a = m_ClusterCentroids.attribute(i); for (int j = 0; j < m_ClusterCentroids.numInstances(); j++) { String val = a.value((int) m_ClusterCentroids.instance(j).value(i)); if (val.length() > maxWidth) { maxWidth = val.length(); } } for (int j = 0; j < a.numValues(); j++) { String val = a.value(j) + " "; if (val.length() > maxAttWidth) { maxAttWidth = val.length(); } } } } if (m_displayStdDevs) { // check for maximum width of maximum frequency count for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) { if (m_ClusterCentroids.attribute(i).isNominal()) { int maxV = Utils.maxIndex(m_FullNominalCounts[i]); /* * int percent = (int)((double)m_FullNominalCounts[i][maxV] / * Utils.sum(m_ClusterSizes) * 100.0); */ int percent = 6; // max percent width (100%) String nomV = "" + m_FullNominalCounts[i][maxV]; // + " (" + percent + "%)"; if (nomV.length() + percent > maxWidth) { maxWidth = nomV.length() + 1; } } } } // check for size of cluster sizes for (int m_ClusterSize : m_ClusterSizes) { String size = "(" + m_ClusterSize + ")"; if (size.length() > maxWidth) { maxWidth = size.length(); } } if (m_displayStdDevs && maxAttWidth < "missing".length()) { maxAttWidth = "missing".length(); } String plusMinus = "+/-"; maxAttWidth += 2; if (m_displayStdDevs && containsNumeric) { maxWidth += plusMinus.length(); } if (maxAttWidth < "Attribute".length() + 2) { maxAttWidth = "Attribute".length() + 2; } if (maxWidth < "Full Data".length()) { maxWidth = "Full Data".length() + 1; } if (maxWidth < "missing".length()) { maxWidth = "missing".length() + 1; } StringBuffer temp = new StringBuffer(); // String naString = "N/A"; /* * for (int i = 0; i < maxWidth+2; i++) { naString += " "; } */ temp.append("\nkMeans\n======\n"); temp.append("\nNumber of iterations: " + m_Iterations + "\n"); if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir) { temp.append("Within cluster sum of squared errors: " + Utils.sum(m_squaredErrors)); } else { temp.append("Sum of within cluster distances: " + Utils.sum(m_squaredErrors)); } if (!m_dontReplaceMissing) { temp.append("\nMissing values globally replaced with mean/mode"); } temp.append("\n\nCluster centroids:\n"); temp.append(pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2)) - "Cluster#".length(), true)); temp.append("\n"); temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false)); temp.append(pad("Full Data", " ", maxWidth + 1 - "Full Data".length(), true)); // cluster numbers for (int i = 0; i < m_NumClusters; i++) { String clustNum = "" + i; temp.append(pad(clustNum, " ", maxWidth + 1 - clustNum.length(), true)); } temp.append("\n"); // cluster sizes String cSize = "(" + Utils.sum(m_ClusterSizes) + ")"; temp.append(pad(cSize, " ", maxAttWidth + maxWidth + 1 - cSize.length(), true)); for (int i = 0; i < m_NumClusters; i++) { cSize = "(" + m_ClusterSizes[i] + ")"; temp.append(pad(cSize, " ", maxWidth + 1 - cSize.length(), true)); } temp.append("\n"); temp.append(pad("", "=", maxAttWidth + (maxWidth * (m_ClusterCentroids.numInstances() + 1) + m_ClusterCentroids.numInstances() + 1), true)); temp.append("\n"); for (int i = 0; i < m_ClusterCentroids.numAttributes(); i++) { String attName = m_ClusterCentroids.attribute(i).name(); temp.append(attName); for (int j = 0; j < maxAttWidth - attName.length(); j++) { temp.append(" "); } String strVal; String valMeanMode; // full data if (m_ClusterCentroids.attribute(i).isNominal()) { if (m_FullMeansOrMediansOrModes[i] == -1) { // missing valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad( (strVal = m_ClusterCentroids.attribute(i).value((int) m_FullMeansOrMediansOrModes[i])), " ", maxWidth + 1 - strVal.length(), true); } } else { if (Double.isNaN(m_FullMeansOrMediansOrModes[i])) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad( (strVal = Utils.doubleToString(m_FullMeansOrMediansOrModes[i], maxWidth, 4).trim()), " ", maxWidth + 1 - strVal.length(), true); } } temp.append(valMeanMode); for (int j = 0; j < m_NumClusters; j++) { if (m_ClusterCentroids.attribute(i).isNominal()) { if (m_ClusterCentroids.instance(j).isMissing(i)) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad( (strVal = m_ClusterCentroids.attribute(i) .value((int) m_ClusterCentroids.instance(j).value(i))), " ", maxWidth + 1 - strVal.length(), true); } } else { if (m_ClusterCentroids.instance(j).isMissing(i)) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad((strVal = Utils .doubleToString(m_ClusterCentroids.instance(j).value(i), maxWidth, 4).trim()), " ", maxWidth + 1 - strVal.length(), true); } } temp.append(valMeanMode); } temp.append("\n"); if (m_displayStdDevs) { // Std devs/max nominal String stdDevVal = ""; if (m_ClusterCentroids.attribute(i).isNominal()) { // Do the values of the nominal attribute Attribute a = m_ClusterCentroids.attribute(i); for (int j = 0; j < a.numValues(); j++) { // full data String val = " " + a.value(j); temp.append(pad(val, " ", maxAttWidth + 1 - val.length(), false)); int count = m_FullNominalCounts[i][j]; int percent = (int) ((double) m_FullNominalCounts[i][j] / Utils.sum(m_ClusterSizes) * 100.0); String percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); // Clusters for (int k = 0; k < m_NumClusters; k++) { count = m_ClusterNominalCounts[k][i][j]; percent = (int) ((double) m_ClusterNominalCounts[k][i][j] / m_ClusterSizes[k] * 100.0); percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); } temp.append("\n"); } // missing (if any) if (m_FullMissingCounts[i] > 0) { // Full data temp.append(pad(" missing", " ", maxAttWidth + 1 - " missing".length(), false)); int count = m_FullMissingCounts[i]; int percent = (int) ((double) m_FullMissingCounts[i] / Utils.sum(m_ClusterSizes) * 100.0); String percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); // Clusters for (int k = 0; k < m_NumClusters; k++) { count = m_ClusterMissingCounts[k][i]; percent = (int) ((double) m_ClusterMissingCounts[k][i] / m_ClusterSizes[k] * 100.0); percentS = "" + percent + "%)"; percentS = pad(percentS, " ", 5 - percentS.length(), true); stdDevVal = "" + count + " (" + percentS; stdDevVal = pad(stdDevVal, " ", maxWidth + 1 - stdDevVal.length(), true); temp.append(stdDevVal); } temp.append("\n"); } temp.append("\n"); } else { // Full data if (Double.isNaN(m_FullMeansOrMediansOrModes[i])) { stdDevVal = pad("--", " ", maxAttWidth + maxWidth + 1 - 2, true); } else { stdDevVal = pad( (strVal = plusMinus + Utils.doubleToString(m_FullStdDevs[i], maxWidth, 4).trim()), " ", maxWidth + maxAttWidth + 1 - strVal.length(), true); } temp.append(stdDevVal); // Clusters for (int j = 0; j < m_NumClusters; j++) { if (m_ClusterCentroids.instance(j).isMissing(i)) { stdDevVal = pad("--", " ", maxWidth + 1 - 2, true); } else { stdDevVal = pad((strVal = plusMinus + Utils .doubleToString(m_ClusterStdDevs.instance(j).value(i), maxWidth, 4).trim()), " ", maxWidth + 1 - strVal.length(), true); } temp.append(stdDevVal); } temp.append("\n\n"); } } } temp.append("\n\n"); return temp.toString(); }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * Gets the squared error for all clusters * * @return the squared error */ public double getSquaredError() { return Utils.sum(m_squaredErrors); }
From source file:aw_cluster.myKMeans.java
@Override public String toString() { if (centroid == null) { return "No clusterer built yet!"; }//w w w .j a va 2 s . c om int maxWidth = 0; int maxAttWidth = 0; boolean containsNumeric = false; for (int i = 0; i < numCluster; i++) { for (int j = 0; j < centroid.numAttributes(); j++) { if (centroid.attribute(j).name().length() > maxAttWidth) { maxAttWidth = centroid.attribute(j).name().length(); } if (centroid.attribute(j).isNumeric()) { containsNumeric = true; double width = Math.log(Math.abs(centroid.instance(i).value(j))) / Math.log(10.0); if (width < 0) { width = 1; } width += 6.0; if ((int) width > maxWidth) { maxWidth = (int) width; } } } } for (int i = 0; i < centroid.numAttributes(); i++) { if (centroid.attribute(i).isNominal()) { Attribute a = centroid.attribute(i); for (int j = 0; j < centroid.numInstances(); j++) { String val = a.value((int) centroid.instance(j).value(i)); if (val.length() > maxWidth) { maxWidth = val.length(); } } for (int j = 0; j < a.numValues(); j++) { String val = a.value(j) + " "; if (val.length() > maxAttWidth) { maxAttWidth = val.length(); } } } } // check for size of cluster sizes for (int i = 0; i < sizeEachCluster.length; i++) { String size = "(" + sizeEachCluster[i] + ")"; if (size.length() > maxWidth) { maxWidth = size.length(); } } String plusMinus = "+/-"; maxAttWidth += 2; if (maxAttWidth < "Attribute".length() + 2) { maxAttWidth = "Attribute".length() + 2; } if (maxWidth < "Full Data".length()) { maxWidth = "Full Data".length() + 1; } if (maxWidth < "missing".length()) { maxWidth = "missing".length() + 1; } StringBuffer temp = new StringBuffer(); temp.append("\nkMeans\n======\n"); temp.append("\nNumber of iterations: " + numIteration + "\n"); if (distanceFunction instanceof EuclideanDistance) { temp.append("Within cluster sum of squared errors: " + Utils.sum(squaredError)); } else { temp.append("Sum of within cluster distances: " + Utils.sum(squaredError)); } temp.append("\n\nCluster centroid:\n"); temp.append(pad("Cluster#", " ", (maxAttWidth + (maxWidth * 2 + 2)) - "Cluster#".length(), true)); temp.append("\n"); temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false)); // cluster numbers for (int i = 0; i < numCluster; i++) { String clustNum = "" + i; temp.append(pad(clustNum, " ", maxWidth + 1 - clustNum.length(), true)); } temp.append("\n"); // cluster sizes String cSize = ""; temp.append(pad(cSize, " ", maxAttWidth - cSize.length(), true)); for (int i = 0; i < numCluster; i++) { cSize = "(" + sizeEachCluster[i] + ")"; temp.append(pad(cSize, " ", maxWidth + 1 - cSize.length(), true)); } temp.append("\n"); temp.append( pad("", "=", maxAttWidth + (maxWidth * (centroid.numInstances()) + centroid.numInstances()), true)); temp.append("\n"); for (int i = 0; i < centroid.numAttributes(); i++) { String attName = centroid.attribute(i).name(); temp.append(attName); for (int j = 0; j < maxAttWidth - attName.length(); j++) { temp.append(" "); } String strVal; String valMeanMode; for (int j = 0; j < numCluster; j++) { if (centroid.attribute(i).isNominal()) { if (centroid.instance(j).isMissing(i)) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad( (strVal = centroid.attribute(i).value((int) centroid.instance(j).value(i))), " ", maxWidth + 1 - strVal.length(), true); } } else { if (centroid.instance(j).isMissing(i)) { valMeanMode = pad("missing", " ", maxWidth + 1 - "missing".length(), true); } else { valMeanMode = pad( (strVal = Utils.doubleToString(centroid.instance(j).value(i), maxWidth, 4).trim()), " ", maxWidth + 1 - strVal.length(), true); } } temp.append(valMeanMode); } temp.append("\n"); } temp.append("\n\n"); return temp.toString(); }
From source file:boosting.classifiers.DecisionStumpWritable.java
License:Open Source License
/** * Generates the classifier./* w w w . j a va2 s .c o m*/ * * @param instances set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances instances) throws Exception { double bestVal = Double.MAX_VALUE, currVal; double bestPoint = -Double.MAX_VALUE; int bestAtt = -1, numClasses; // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); // only class? -> build ZeroR model if (instances.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(instances); return; } else { m_ZeroR = null; } double[][] bestDist = new double[3][instances.numClasses()]; m_Instances = new Instances(instances); if (m_Instances.classAttribute().isNominal()) { numClasses = m_Instances.numClasses(); } else { numClasses = 1; } // For each attribute boolean first = true; for (int i = 0; i < m_Instances.numAttributes(); i++) { if (i != m_Instances.classIndex()) { // Reserve space for distribution. m_Distribution = new double[3][numClasses]; // Compute value of criterion for best split on attribute if (m_Instances.attribute(i).isNominal()) { currVal = findSplitNominal(i); } else { currVal = findSplitNumeric(i); } if ((first) || (currVal < bestVal)) { bestVal = currVal; bestAtt = i; bestPoint = m_SplitPoint; for (int j = 0; j < 3; j++) { System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, numClasses); } } // First attribute has been investigated first = false; } } // Set attribute, split point and distribution. m_AttIndex = bestAtt; m_SplitPoint = bestPoint; m_Distribution = bestDist; if (m_Instances.classAttribute().isNominal()) { for (int i = 0; i < m_Distribution.length; i++) { double sumCounts = Utils.sum(m_Distribution[i]); if (sumCounts == 0) { // This means there were only missing attribute values System.arraycopy(m_Distribution[2], 0, m_Distribution[i], 0, m_Distribution[2].length); Utils.normalize(m_Distribution[i]); } else { Utils.normalize(m_Distribution[i], sumCounts); } } } // Save memory m_Instances = new Instances(m_Instances, 0); }