List of usage examples for weka.core Utils minIndex
public staticint minIndex(double[] doubles)
From source file:items.Sequence.java
License:Open Source License
public synchronized double LB_distance(Sequence a, double longestdist) { double best_so_far = longestdist; double LB_dist = 0.0; double true_dist = 0.0; Sequence S1 = this; Sequence S2 = a;/* www . j av a 2s . co m*/ final int tailleS = S1.getNbTuples(); final int tailleT = S2.getNbTuples(); double[] U = new double[tailleS]; double[] L = new double[tailleS]; for (int k = 0; k < L.length; k++) { int r_i[] = new int[5]; int r_j[] = new int[5]; for (int n = 0; n < r_j.length; n++) { r_i[n] = k - n; r_j[n] = k + n; if (r_i[n] < 0) r_i[n] = 0; if (r_j[n] > tailleS - 1) r_j[n] = tailleS - 1; } double value_i[] = new double[5]; double value_j[] = new double[5]; for (int m = 0; m < r_j.length; m++) { value_i[m] = S1.sequence[r_i[m]].getValue(); value_j[m] = S1.sequence[r_j[m]].getValue(); } U[k] = value_i[Utils.maxIndex(value_i)]; L[k] = value_j[Utils.minIndex(value_j)]; } double[] dist = new double[tailleS]; for (int i = 0; i < tailleT; i++) { double[] C = new double[tailleT]; for (int j = 0; j < C.length; j++) { C[j] = S2.sequence[j].getValue(); } if (C[i] > U[i]) dist[i] = Math.pow((C[i] - U[i]), 2); else if (C[i] < L[i]) dist[i] = Math.pow((C[i] - L[i]), 2); else dist[i] = 0.0; } LB_dist = sqrt(Utils.sum(dist)); if (LB_dist < best_so_far) { true_dist = S1.distance(S2); if (true_dist < best_so_far) best_so_far = true_dist; } return best_so_far; }
From source file:meka.classifiers.multilabel.incremental.meta.BaggingMLUpdateableADWIN.java
License:Open Source License
/** * DistributionForInstance - And Check for drift by measuring a type of error. */// w w w . j a v a 2 s . c o m @Override public double[] distributionForInstance(Instance x) throws Exception { // classification double y[] = new double[x.classIndex()]; for (int i = 0; i < m_NumIterations; i++) { double y_i[] = m_Classifiers[i].distributionForInstance(x); for (int j = 0; j < y_i.length; j++) { y[j] += y_i[j]; } accuracies[i] += error(y_i, MLUtils.toDoubleArray(x, y.length)); } for (int j = 0; j < y.length; j++) { y[j] = y[j] / m_NumIterations; } double d = error(y, MLUtils.toDoubleArray(x, y.length)); // ADWIN stuff double ErrEstim = this.adwin.getEstimation(); if (this.adwin.setInput(1.0 - d)) if (this.adwin.getEstimation() > ErrEstim) { // find worst classifier int index = Utils.minIndex(accuracies); if (getDebug()) System.out.println("------- CHANGE DETECTED / Reset Model #" + index + " ------- "); // reset this classifier m_Classifiers[index] = (ProblemTransformationMethod) AbstractClassifier.makeCopy(m_Classifier); m_Classifiers[index].buildClassifier(new Instances(m_InstancesTemplate)); // ... and reset ADWIN this.adwin = new ADWIN(); accuracies = new double[m_NumIterations]; } return y; }
From source file:milk.classifiers.MINND.java
License:Open Source License
/** * Pre-process the given exemplar according to the other exemplars * in the given exemplars. It also updates noise data statistics. * * @param data the whole exemplars/* w w w. java2s .c om*/ * @param pos the position of given exemplar in data * @return the processed exemplar * @exception if the returned exemplar is wrong */ public Exemplar preprocess(Exemplars data, int pos) throws Exception { Exemplar before = data.exemplar(pos); if ((int) before.classValue() == 0) { m_NoiseM[pos] = null; m_NoiseV[pos] = null; return before; } Exemplar after = new Exemplar(before, 0); Exemplar noises = new Exemplar(before, 0); for (int g = 0; g < before.getInstances().numInstances(); g++) { Instance datum = before.getInstances().instance(g); double[] dists = new double[data.numExemplars()]; for (int i = 0; i < data.numExemplars(); i++) { if (i != pos) dists[i] = distance(datum, m_Mean[i], m_Variance[i], i); else dists[i] = Double.POSITIVE_INFINITY; } int[] pred = new int[m_NumClasses]; for (int n = 0; n < pred.length; n++) pred[n] = 0; for (int o = 0; o < m_Select; o++) { int index = Utils.minIndex(dists); pred[(int) m_Class[index]]++; dists[index] = Double.POSITIVE_INFINITY; } int clas = Utils.maxIndex(pred); if ((int) datum.classValue() != clas) noises.add(datum); else after.add(datum); } if (Utils.gr(noises.getInstances().sumOfWeights(), 0)) { m_NoiseM[pos] = noises.meanOrMode(); m_NoiseV[pos] = noises.variance(); for (int y = 0; y < m_NoiseV[pos].length; y++) { if (Utils.eq(m_NoiseV[pos][y], 0.0)) m_NoiseV[pos][y] = m_ZERO; } } else { m_NoiseM[pos] = null; m_NoiseV[pos] = null; } return after; }
From source file:milk.classifiers.MINND.java
License:Open Source License
/** * Use Kullback Leibler distance to find the nearest neighbours of * the given exemplar./* w w w . ja v a 2 s.c om*/ * It also uses K-Nearest Neighbour algorithm to classify the * test exemplar * * @param ex the given test exemplar * @return the classification * @exception Exception if the exemplar could not be classified * successfully */ public double classifyExemplar(Exemplar e) throws Exception { Exemplar ex = new Exemplar(e); ex = scale(ex); double[] var = ex.variance(); // The Kullback distance to all exemplars double[] kullback = new double[m_Class.length]; // The first K nearest neighbours' predictions */ double[] predict = new double[m_NumClasses]; for (int h = 0; h < predict.length; h++) predict[h] = 0; ex = cleanse(ex); if (ex.getInstances().numInstances() == 0) { System.out.println("???Whole exemplar falls into ambiguous area!"); return 1.0; // Bias towards positive class } double[] mean = ex.meanOrMode(); // Avoid zero sigma for (int h = 0; h < var.length; h++) { if (Utils.eq(var[h], 0.0)) var[h] = m_ZERO; } for (int i = 0; i < m_Class.length; i++) { if (m_ValidM[i] != null) kullback[i] = kullback(mean, m_ValidM[i], var, m_Variance[i], i); else kullback[i] = Double.POSITIVE_INFINITY; } for (int j = 0; j < m_Neighbour; j++) { int pos = Utils.minIndex(kullback); predict[(int) m_Class[pos]] += m_Weights[pos]; kullback[pos] = Double.POSITIVE_INFINITY; } System.out.println("???There are still some unambiguous instances in this exemplar! Predicted as: " + Utils.maxIndex(predict)); return (double) Utils.maxIndex(predict); }
From source file:milk.classifiers.MINND.java
License:Open Source License
/** * Cleanse the given exemplar according to the valid and noise data * statistics// w ww . j a v a 2s. c o m * * @param before the given exemplar * @return the processed exemplar * @exception if the returned exemplar is wrong */ public Exemplar cleanse(Exemplar before) throws Exception { Exemplar after = new Exemplar(before, 0); for (int g = 0; g < before.getInstances().numInstances(); g++) { Instance datum = before.getInstances().instance(g); double[] minNoiDists = new double[m_Choose]; double[] minValDists = new double[m_Choose]; int noiseCount = 0, validCount = 0; double[] nDist = new double[m_Mean.length]; double[] vDist = new double[m_Mean.length]; for (int h = 0; h < m_Mean.length; h++) { if (m_ValidM[h] == null) vDist[h] = Double.POSITIVE_INFINITY; else vDist[h] = distance(datum, m_ValidM[h], m_ValidV[h], h); if (m_NoiseM[h] == null) nDist[h] = Double.POSITIVE_INFINITY; else nDist[h] = distance(datum, m_NoiseM[h], m_NoiseV[h], h); } for (int k = 0; k < m_Choose; k++) { int pos = Utils.minIndex(vDist); minValDists[k] = vDist[pos]; vDist[pos] = Double.POSITIVE_INFINITY; pos = Utils.minIndex(nDist); minNoiDists[k] = nDist[pos]; nDist[pos] = Double.POSITIVE_INFINITY; } int x = 0, y = 0; while ((x + y) < m_Choose) { if (minValDists[x] <= minNoiDists[y]) { validCount++; x++; } else { noiseCount++; y++; } } if (x >= y) after.add(datum); } return after; }
From source file:moa.classifiers.AccuracyWeightedEnsemble.java
License:Open Source License
/** * Removes the poorest classifier from the model, thus decreasing the models * size.// www. j a v a 2 s . com * * @return the size of the removed classifier. */ protected int removePoorestModelBytes() { int poorestIndex = Utils.minIndex(this.ensembleWeights); int byteSize = this.ensemble[poorestIndex].measureByteSize(); discardModel(poorestIndex); return byteSize; }
From source file:moa.classifiers.meta.WeightedMajorityAlgorithm.java
License:Open Source License
protected int removePoorestModelBytes() { int poorestIndex = Utils.minIndex(this.ensembleWeights); int byteSize = this.ensemble[poorestIndex].measureByteSize(); discardModel(poorestIndex);/*from w w w . j ava 2s.c om*/ return byteSize; }
From source file:mulan.classifier.meta.ConstrainedKMeans.java
License:Open Source License
/** * clusters an instance that has been through the filters * * @param instance the instance to assign a cluster to * @param updateErrors if true, update the within clusters sum of errors * @return a cluster number// w w w .j av a 2s . com */ private int clusterProcessedInstance(Instance instance, boolean updateErrors) { // calculate distance from bucket centers double[] distance = new double[m_NumClusters]; for (int i = 0; i < m_NumClusters; i++) { distance[i] = distance(instance, m_ClusterCentroids.instance(i)); // create a bucket item from the instance } bucketInstance ci = new bucketInstance(); ci.setDistances(distance); // assing item to closest bucket int bestCluster; boolean finished; do { finished = true; // add to closestBucket bestCluster = Utils.minIndex(distance); //System.out.print("closest bucket: " + closestBucket + "\n"); ci.setDistance(distance[bestCluster]); //* insert sort int j; for (j = 0; j < bucket[bestCluster].size() && ((bucketInstance) bucket[bestCluster].get(j)).compareTo(ci) < 0; j++) { } bucket[bestCluster].add(j, ci); //*/ /* simple insert bucket[closestBucket].add(ci); //*/ if (bucket[bestCluster].size() > bucketSize) { //System.out.println("removing an instance"); ci = (bucketInstance) bucket[bestCluster].remove(bucket[bestCluster].size() - 1); distance = ci.getDistances(); //System.out.print("distances: " + Arrays.toString(distance) + "\n"); distance[bestCluster] = Double.MAX_VALUE; ci.setDistances(distance); finished = false; } } while (!finished); if (updateErrors) { m_squaredErrors[bestCluster] += distance[bestCluster]; } return bestCluster; }
From source file:mulan.classifier.meta.thresholding.OneThreshold.java
License:Open Source License
/** * Evaluates the performance of the learner on a data set according to a * bipartition measure for a range of thresholds * * @param data the test data to evaluate different thresholds * @param measure the evaluation is based on this parameter * @param min the minimum threshold//from w w w . j a v a 2 s . c o m * @param max the maximum threshold * @param the step to increase threshold from min to max * @return the optimal threshold * @throws Exception */ private double computeThreshold(MultiLabelLearner learner, MultiLabelInstances data, BipartitionMeasureBase measure, double min, double step, double max) throws Exception { int numOfThresholds = (int) Math.rint((max - min) / step + 1); double[] performance = new double[numOfThresholds]; BipartitionMeasureBase[] measureForThreshold = new BipartitionMeasureBase[numOfThresholds]; for (int i = 0; i < numOfThresholds; i++) { measureForThreshold[i] = (BipartitionMeasureBase) measure.makeCopy(); measureForThreshold[i].reset(); } boolean[] thresholdHasProblem = new boolean[numOfThresholds]; Arrays.fill(thresholdHasProblem, false); for (int j = 0; j < data.getNumInstances(); j++) { Instance instance = data.getDataSet().instance(j); if (data.hasMissingLabels(instance)) { continue; } MultiLabelOutput mlo = learner.makePrediction(instance); boolean[] trueLabels = new boolean[numLabels]; for (int counter = 0; counter < numLabels; counter++) { int classIdx = labelIndices[counter]; String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx)); trueLabels[counter] = classValue.equals("1"); } double[] confidences = mlo.getConfidences(); int counter = 0; double currentThreshold = min; while (currentThreshold <= max) { boolean[] bipartition = new boolean[numLabels]; for (int k = 0; k < numLabels; k++) { if (confidences[k] >= currentThreshold) { bipartition[k] = true; } } try { MultiLabelOutput temp = new MultiLabelOutput(bipartition); measureForThreshold[counter].update(temp, trueLabels); } catch (MulanRuntimeException e) { thresholdHasProblem[counter] = true; } currentThreshold += step; counter++; } } for (int i = 0; i < numOfThresholds; i++) { if (!thresholdHasProblem[i]) performance[i] = Math.abs(measure.getIdealValue() - measureForThreshold[i].getValue()); else performance[i] = Double.MAX_VALUE; } return min + Utils.minIndex(performance) * step; }
From source file:mulan.classifier.meta.thresholding.RCut.java
License:Open Source License
/** * Automatically selects a threshold based on training set performance * evaluated using cross-validation//from w w w . j a v a 2s . c o m * * @param measure performance is evaluated based on this parameter * @param folds number of cross-validation folds * @throws InvalidDataFormatException * @throws Exception */ private void autoTuneThreshold(MultiLabelInstances trainingData, BipartitionMeasureBase measure, int folds) throws InvalidDataFormatException, Exception { if (folds < 2) { throw new IllegalArgumentException("folds should be more than 1"); } double[] totalDiff = new double[numLabels + 1]; LabelsMetaData labelsMetaData = trainingData.getLabelsMetaData(); MultiLabelLearner tempLearner = foldLearner.makeCopy(); for (int f = 0; f < folds; f++) { Instances train = trainingData.getDataSet().trainCV(folds, f); MultiLabelInstances trainMulti = new MultiLabelInstances(train, labelsMetaData); Instances test = trainingData.getDataSet().testCV(folds, f); MultiLabelInstances testMulti = new MultiLabelInstances(test, labelsMetaData); tempLearner.build(trainMulti); double[] diff = computeThreshold(tempLearner, testMulti, measure); for (int k = 0; k < diff.length; k++) { totalDiff[k] += diff[k]; } } t = Utils.minIndex(totalDiff); }