List of usage examples for weka.core Utils normalize
public static void normalize(double[] doubles, double sum)
From source file:Classifiers.BRkNN.java
License:Open Source License
/** * Calculates the confidences of the labels, based on the neighboring * instances/*w ww. j av a 2 s.co m*/ * * @param neighbours * the list of nearest neighboring instances * @param distances * the distances of the neighbors * @return the confidences of the labels */ private double[] getConfidences(Instances neighbours, double[] distances) { double total, weight; double neighborLabels = 0; double[] confidences = new double[numLabels]; // Set up a correction to the estimator for (int i = 0; i < numLabels; i++) { confidences[i] = 1.0 / Math.max(1, train.numInstances()); } total = (double) numLabels / Math.max(1, train.numInstances()); for (int i = 0; i < neighbours.numInstances(); i++) { // Collect class counts Instance current = neighbours.instance(i); distances[i] = distances[i] * distances[i]; distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels)); weight = 1.0; weight *= current.weight(); for (int j = 0; j < numLabels; j++) { double value = Double.parseDouble( current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j]))); if (Utils.eq(value, 1.0)) { confidences[j] += weight; neighborLabels += weight; } } total += weight; } avgPredictedLabels = (int) Math.round(neighborLabels / total); // Normalise distribution if (total > 0) { Utils.normalize(confidences, total); } return confidences; }
From source file:de.uni_potsdam.hpi.bpt.promnicat.analysisModules.clustering.ProcessInstances.java
License:Open Source License
/** * Creates a new dataset of the same size using random sampling with * replacement according to the given weight vector. The weights of the * instances in the new dataset are set to one. The length of the weight * vector has to be the same as the number of instances in the dataset, and * all weights have to be positive./*from w w w. j a v a 2 s .c o m*/ * * @param random * a random number generator * @param weights * the weight vector * @return the new dataset * @throws IllegalArgumentException * if the weights array is of the wrong length or contains * negative weights. */ public ProcessInstances resampleWithWeights(Random random, double[] weights) { if (weights.length != numInstances()) { throw new IllegalArgumentException("weights.length != numInstances."); } ProcessInstances newData = new ProcessInstances(this, numInstances()); if (numInstances() == 0) { return newData; } double[] probabilities = new double[numInstances()]; double sumProbs = 0, sumOfWeights = Utils.sum(weights); for (int i = 0; i < numInstances(); i++) { sumProbs += random.nextDouble(); probabilities[i] = sumProbs; } Utils.normalize(probabilities, sumProbs / sumOfWeights); // Make sure that rounding errors don't mess things up probabilities[numInstances() - 1] = sumOfWeights; int k = 0; int l = 0; sumProbs = 0; while ((k < numInstances() && (l < numInstances()))) { if (weights[l] < 0) { throw new IllegalArgumentException("Weights have to be positive."); } sumProbs += weights[l]; while ((k < numInstances()) && (probabilities[k] <= sumProbs)) { newData.addInstance(getInstance(l)); newData.getInstance(k).setWeight(1); k++; } l++; } return newData; }
From source file:gyc.SMOTEBagging.java
License:Open Source License
/** * Creates a new dataset of the same size using random sampling * with replacement according to the given weight vector. The * weights of the instances in the new dataset are set to one. * The length of the weight vector has to be the same as the * number of instances in the dataset, and all weights have to * be positive.//from w w w . j a v a2 s. com * * @param data the data to be sampled from * @param random a random number generator * @param sampled indicating which instance has been sampled * @return the new dataset * @throws IllegalArgumentException if the weights array is of the wrong * length or contains negative weights. */ public final Instances resampleWithWeights(Instances data, Random random, boolean[] sampled) { double[] weights = new double[data.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = data.instance(i).weight(); } Instances newData = new Instances(data, data.numInstances()); if (data.numInstances() == 0) { return newData; } double[] probabilities = new double[data.numInstances()]; double sumProbs = 0, sumOfWeights = Utils.sum(weights); for (int i = 0; i < data.numInstances(); i++) { sumProbs += random.nextDouble(); probabilities[i] = sumProbs; } Utils.normalize(probabilities, sumProbs / sumOfWeights); // Make sure that rounding errors don't mess things up probabilities[data.numInstances() - 1] = sumOfWeights; int k = 0; int l = 0; sumProbs = 0; while ((k < data.numInstances() && (l < data.numInstances()))) { if (weights[l] < 0) { throw new IllegalArgumentException("Weights have to be positive."); } sumProbs += weights[l]; while ((k < data.numInstances()) && (probabilities[k] <= sumProbs)) { newData.add(data.instance(l)); sampled[l] = true; newData.instance(k).setWeight(1); k++; } l++; } return newData; }
From source file:milk.core.Exemplars.java
License:Open Source License
/** * Creates a new dataset of the same size using random sampling * with replacement according to the given weight vector. The * weights of the exemplars in the new dataset are set to one. * The length of the weight vector has to be the same as the * number of exemplars in the dataset, and all weights have to * be positive./*from w w w. j a va2s . c o m*/ * * @param random a random number generator * @param weights the weight vector * @return the new dataset * @exception Exception if the weights array is of the wrong * length or contains negative weights or * any other errors related to exemplars. */ public final Exemplars resampleWithWeights(Random random, double[] weights) throws Exception { int len = weights.length; if (len != numExemplars()) { throw new IllegalArgumentException("weights.length != numExemplars."); } Exemplars newData = new Exemplars(this, len); double[] probabilities = new double[len]; double sumProbs = 0, sumOfWeights = Utils.sum(weights); for (int i = 0; i < len; i++) { sumProbs += random.nextDouble(); probabilities[i] = sumProbs; } Utils.normalize(probabilities, sumProbs / sumOfWeights); // Make sure that rounding errors don't mess things up probabilities[len - 1] = sumOfWeights; int k = 0; int l = 0; sumProbs = 0; while ((k < len && (l < len))) { if (weights[l] < 0) { throw new IllegalArgumentException("Weights have to be positive."); } sumProbs += weights[l]; while ((k < len) && (probabilities[k] <= sumProbs)) { newData.m_Exemplars.addElement(new Exemplar(exemplar(l))); newData.exemplar(k).setWeight(1.0); k++; } l++; } return newData; }
From source file:mulan.classifier.lazy.BRkNN.java
License:Open Source License
/** * Calculates the confidences of the labels, based on the neighboring * instances//from w w w. j a va2 s . co m * * @param neighbours * the list of nearest neighboring instances * @param distances * the distances of the neighbors * @return the confidences of the labels */ private double[] getConfidences(Instances neighbours, double[] distances) { double total = 0, weight; double neighborLabels = 0; double[] confidences = new double[numLabels]; // Set up a correction to the estimator for (int i = 0; i < numLabels; i++) { confidences[i] = 1.0 / Math.max(1, train.numInstances()); } total = (double) numLabels / Math.max(1, train.numInstances()); for (int i = 0; i < neighbours.numInstances(); i++) { // Collect class counts Instance current = neighbours.instance(i); distances[i] = distances[i] * distances[i]; distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels)); switch (distanceWeighting) { case WEIGHT_INVERSE: weight = 1.0 / (distances[i] + 0.001); // to avoid division by // zero break; case WEIGHT_SIMILARITY: weight = 1.0 - distances[i]; break; default: // WEIGHT_NONE: weight = 1.0; break; } weight *= current.weight(); for (int j = 0; j < numLabels; j++) { double value = Double.parseDouble( current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j]))); if (Utils.eq(value, 1.0)) { confidences[j] += weight; neighborLabels += weight; } } total += weight; } avgPredictedLabels = (int) Math.round(neighborLabels / total); // Normalise distribution if (total > 0) { Utils.normalize(confidences, total); } return confidences; }
From source file:test.org.moa.opencl.IBk.java
License:Open Source License
/** * Turn the list of nearest neighbors into a probability distribution. * * @param neighbours the list of nearest neighboring instances * @param distances the distances of the neighbors * @return the probability distribution//w w w .j a v a2 s.c o m * @throws Exception if computation goes wrong or has no class attribute */ protected double[] makeDistribution(Instances neighbours, double[] distances) throws Exception { double total = 0, weight; double[] distribution = new double[m_NumClasses]; // Set up a correction to the estimator if (m_ClassType == Attribute.NOMINAL) { for (int i = 0; i < m_NumClasses; i++) { distribution[i] = 1.0 / Math.max(1, m_Train.numInstances()); } total = (double) m_NumClasses / Math.max(1, m_Train.numInstances()); } for (int i = 0; i < neighbours.numInstances(); i++) { // Collect class counts Instance current = neighbours.instance(i); distances[i] = distances[i] * distances[i]; distances[i] = Math.sqrt(distances[i] / m_NumAttributesUsed); switch (m_DistanceWeighting) { case WEIGHT_INVERSE: weight = 1.0 / (distances[i] + 0.001); // to avoid div by zero break; case WEIGHT_SIMILARITY: weight = 1.0 - distances[i]; break; default: // WEIGHT_NONE: weight = 1.0; break; } weight *= current.weight(); try { switch (m_ClassType) { case Attribute.NOMINAL: distribution[(int) current.classValue()] += weight; break; case Attribute.NUMERIC: distribution[0] += current.classValue() * weight; break; } } catch (Exception ex) { throw new Error("Data has no class attribute!"); } total += weight; } // Normalise distribution if (total > 0) { Utils.normalize(distribution, total); } return distribution; }
From source file:tr.gov.ulakbim.jDenetX.streams.generators.multilabel.MetaMultilabelGenerator.java
License:Open Source License
/** * GenSkew.//from w ww . j a va 2 s .c om * Generate a label skew (given desired lcard z) * * @param z desired label cardinality * @param r random generator */ private double[] fillSkew(Random r, double z) { double d[] = new double[m_N]; for (int i = 0; i < m_N; i++) { if (skewOption.getValue() >= 1) d[i] = m_MetaRandom.nextDouble(); else d[i] = 1.0; } Utils.normalize(d, Utils.sum(d) / z); for (int i = 0; i < m_N; i++) { if (Double.isNaN(d[i])) d[i] = 0.01; } return d; }