List of usage examples for weka.core Instance classValue
public double classValue();
From source file:moa.classifiers.rules.RuleClassifier.java
License:Apache License
public double computeAnomalySupervised(Rule rl, int ruleIndex, Instance inst) { //Not supervised ArrayList<Integer> caseAnomalyTemp = new ArrayList<Integer>(); ArrayList<ArrayList<Double>> AttribAnomalyStatisticTemp2 = new ArrayList<ArrayList<Double>>(); double D = 0.0; double N = 0.0; if (rl.instancesSeen > this.anomalyNumInstThresholdOption.getValue() && this.anomalyDetectionOption.isSet()) { for (int x = 0; x < inst.numAttributes() - 1; x++) { if (!inst.isMissing(x)) { ArrayList<Double> AttribAnomalyStatisticTemp = new ArrayList<Double>(); if (inst.attribute(x).isNumeric()) { //Numeric Attributes if ((rl.instancesSeen - rl.attributeMissingValues.getValue(x)) > 30) { double mean = computeMean( (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()), (int) rl.obserClassDistrib.getValue((int) inst.classValue())); double sd = computeSD( (double) rl.squaredAttributeStatisticsSupervised.get(x) .get((int) inst.classValue()), (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()), (int) rl.obserClassDistrib.getValue((int) inst.classValue())); double probability = computeProbability(mean, sd, inst.value(x)); if (probability != 0.0) { D = D + Math.log(probability); if (probability < this.probabilityThresholdOption.getValue()) { //0.10 N = N + Math.log(probability); AttribAnomalyStatisticTemp.add((double) x); AttribAnomalyStatisticTemp.add(inst.value(x)); AttribAnomalyStatisticTemp.add(mean); AttribAnomalyStatisticTemp.add(sd); AttribAnomalyStatisticTemp.add(probability); AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp); }/*from w ww . ja va 2 s .com*/ } } } else { //Nominal double attribVal = inst.value(x); //Attribute value double classVal = inst.classValue(); //Attribute value double probability = rl.observers.get(x).probabilityOfAttributeValueGivenClass(attribVal, (int) classVal); if (probability != 0.0) { D = D + Math.log(probability); if (probability < this.probabilityThresholdOption.getValue()) { //0.10 N = N + Math.log(probability); AttribAnomalyStatisticTemp.add((double) x); AttribAnomalyStatisticTemp.add(inst.value(x)); AttribAnomalyStatisticTemp.add(probability); AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp); } } } } } } double anomaly = 0.0; if (D != 0) { anomaly = Math.abs(N / D); } if (anomaly >= this.anomalyProbabilityThresholdOption.getValue()) { caseAnomalyTemp.add(this.numInstance); double val = anomaly * 100; caseAnomalyTemp.add((int) val); this.caseAnomalySupervised.add(caseAnomalyTemp); Rule y = new Rule(this.ruleSet.get(ruleIndex)); this.ruleSetAnomaliesSupervised.add(y); this.ruleAnomaliesIndexSupervised.add(ruleIndex + 1); this.ruleAttribAnomalyStatisticsSupervised.add(AttribAnomalyStatisticTemp2); } return anomaly; }
From source file:moa.classifiers.SingleClassifierDrift.java
License:Open Source License
@Override public void trainOnInstanceImpl(Instance inst) { int trueClass = (int) inst.classValue(); boolean prediction; if (Utils.maxIndex(this.classifier.getVotesForInstance(inst)) == trueClass) { prediction = true;/* w w w.j a v a2s . c o m*/ } else { prediction = false; } switch (this.driftDetectionMethod.computeNextVal(prediction)) { case DriftDetectionMethod.DDM_WARNING_LEVEL: //System.out.println("1 0 W"); if (newClassifierReset == true) { this.newclassifier.resetLearning(); newClassifierReset = false; } this.newclassifier.trainOnInstance(inst); break; case DriftDetectionMethod.DDM_OUTCONTROL_LEVEL: //System.out.println("0 1 O"); this.classifier = null; this.classifier = this.newclassifier; if (this.classifier instanceof WEKAClassifier) { ((WEKAClassifier) this.classifier).buildClassifier(); } this.newclassifier = (Classifier) getPreparedClassOption(this.baseLearnerOption); this.newclassifier.resetLearning(); break; case DriftDetectionMethod.DDM_INCONTROL_LEVEL: //System.out.println("0 0 I"); newClassifierReset = true; break; default: //System.out.println("ERROR!"); } this.classifier.trainOnInstance(inst); }
From source file:moa.classifiers.trees.ePTTD.java
License:Creative Commons License
@Override public boolean correctlyClassifies(Instance inst) { // TODO Auto-generated method stub return Utils.maxIndex(getVotesForInstance(inst)) == (int) inst.classValue(); }
From source file:moa.cluster.Clustering.java
License:Apache License
public Clustering(List<? extends Instance> points) { HashMap<Integer, Integer> labelMap = classValues(points); int dim = points.get(0).dataset().numAttributes() - 1; int numClasses = labelMap.size(); int noiseLabel; Attribute classLabel = points.get(0).dataset().classAttribute(); int lastLabelIndex = classLabel.numValues() - 1; if (classLabel.value(lastLabelIndex) == "noise") { noiseLabel = lastLabelIndex;//w w w .j a v a 2 s . c om } else { noiseLabel = -1; } ArrayList<Instance>[] sorted_points = (ArrayList<Instance>[]) new ArrayList[numClasses]; for (int i = 0; i < numClasses; i++) { sorted_points[i] = new ArrayList<Instance>(); } for (Instance point : points) { int clusterid = (int) point.classValue(); if (clusterid == noiseLabel) continue; sorted_points[labelMap.get(clusterid)].add((Instance) point); } this.clusters = new AutoExpandVector<Cluster>(); for (int i = 0; i < numClasses; i++) { if (sorted_points[i].size() > 0) { SphereCluster s = new SphereCluster(sorted_points[i], dim); s.setId(sorted_points[i].get(0).classValue()); s.setGroundTruth(sorted_points[i].get(0).classValue()); clusters.add(s); } } }
From source file:moa.cluster.Riffle.java
License:Apache License
/** * Create a new cluster from an exemplar data point * @param x /* w w w .j av a 2s. com*/ */ public Riffle(Instance x) { safeInit(x); this.numLabeledPoints = (int) Math.ceil(x.weight()); this.labelFrequencies[(int) x.classValue()] += x.weight(); this.gtLabelFrequencies[(int) x.classValue()]++; for (int i = 0; (i < this.symbolFrequencies.length) && (i < x.numAttributes()); ++i) { double value = x.value(i); if (this.symbolFrequencies[i] == null) { if ((this.parentClusterer != null) && (this.parentClusterer.getUniverse() != null)) { this.variances[i] = this.parentClusterer.getUniverse().variances[i]; } else { this.variances[i] = this.initialStandardDeviationOption.getValue(); } } else { this.variances[i] = 1; this.symbolFrequencies[i][(int) value]++; } } this.numTotalPoints = 1; this.setGroundTruth(x.classValue()); this.setCenter(x.toDoubleArray()); this.setWeight(x.weight()); this.setRadius(this.initialStandardDeviationOption.getValue()); this.runningSumOfSquares = 0.0; this.setId(autoindex.getAndIncrement()); }
From source file:moa.cluster.Riffle.java
License:Apache License
/** * Add a data point instance to this cluster * * @param x/* w w w . j a v a 2 s.c o m*/ */ final public void addInstance(Instance x) { safeInit(x); this.numTotalPoints++; this.numLabeledPoints += (x.weight() > 0.9999) ? 1 : 0; this.labelFrequencies[(int) x.classValue()] += x.weight(); //non-training data has a weight of zero this.gtLabelFrequencies[(int) x.classValue()]++; // For non-decision metrics only //Select strategy for on-line *-means (Any means) switch (updateStrategyOption.getChosenIndex()) { case 0: this.addInstanceGrimson(x); break; case 1: this.addInstanceViaShephard(x); break; case 2: this.instances.add(x); return; default: System.err.println("Invalid addInstance strategy"); } recompute(); }
From source file:moa.cluster.Riffle.java
License:Apache License
/** * Inverse process of adding instance/* ww w . j av a 2 s . c o m*/ * * @param x */ final public void removeInstance(Instance x) { safeInit(x); this.numLabeledPoints -= (int) Math.ceil(x.weight()); this.labelFrequencies[(int) x.classValue()] -= x.weight(); //non-training data has a weight of zero this.gtLabelFrequencies[(int) x.classValue()]--; // For non-decision metrics only this.numTotalPoints--; //Select strategy for on-line *-means switch (updateStrategyOption.getChosenIndex()) { case 0: this.removeInstanceGrimson(x); break; case 1: this.removeInstanceViaShephard(x); break; case 2: this.instances.remove(x); return; default: System.err.println("Invalid removeInstance strategy"); } recompute(); }
From source file:moa.cluster.Riffle.java
License:Apache License
/** * Set pre-computed information fields//from www. ja v a 2 s . co m * @return */ public final double recomputeAll() { if (this.instances != null) { Arrays.fill(this.gtLabelFrequencies, 0); Arrays.fill(this.labelFrequencies, 0); this.numTotalPoints = instances.size(); this.numLabeledPoints = 0; if (!this.instances.isEmpty()) { // double[] clusterCentroid = this.getCenter(); double[] clusterVariance = this.getVariances(); for (int i = 0; i < centroid.length; ++i) { centroid[i] /= (double) this.instances.size() + 1.0; } for (double[] sf : this.symbolFrequencies) { if (sf != null) { Arrays.fill(sf, 0); } } for (Instance x : this.instances) { // Pre-populate univeral cluster with data points if (x == null) { System.out.println("Sieve::MaximizationStep() - x is NULL!"); continue; } this.gtLabelFrequencies[(int) x.classValue()]++; this.labelFrequencies[(int) x.classValue()] += x.weight(); this.numLabeledPoints += x.weight(); double[] xValues = x.toDoubleArray(); for (int i = 0; i < xValues.length; ++i) { double val = xValues[i]; centroid[i] += val / ((double) this.instances.size() + 1.0); if ((this.symbolFrequencies[i] != null) && (val < this.symbolFrequencies[i].length)) { this.symbolFrequencies[i][(int) val]++; } } } // for // Set 'centroid' to 'mode' (most frequent symbol) for nominal data: for (int i = 0; i < this.symbolFrequencies.length; ++i) { if (this.symbolFrequencies[i] != null) { centroid[i] = weka.core.Utils.maxIndex(this.symbolFrequencies[i]); } } setCenter(centroid); // temporary - start with standard gaussian, gets updated below // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so // we use the 2-Pass method for computing sample variance (per dimension) double n = instances.size(); if (n > 1) { double[] cep = new double[centroid.length]; Arrays.fill(cep, 0); Arrays.fill(clusterVariance, 0); for (Instance x : this.instances) { if (x == null) { System.out.println("Riffle::recompute() - x is null!"); continue; } double[] xValues = x.toDoubleArray(); for (int i = 0; i < xValues.length; ++i) { double delta = (this.symbolFrequencies[i] == null) ? centroid[i] - xValues[i] : (Math.abs(centroid[i] - xValues[i]) < 1e-32) ? 1 : 1e-20; cep[i] += delta; clusterVariance[i] += delta * delta; // Statistical Variance } } for (int i = 0; i < clusterVariance.length; ++i) { clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1); } setVariances(clusterVariance); } // end if (enough data for variance) } // end if(!instances.empty) recompute(); } // end if(!instances null) return getRadius() * getEntropy(); }
From source file:moa.clusterer.FeS2.java
License:Apache License
/** * Wrapper for parallel K-Means for processing warm-up data set * @param D Warm-up data set//from www .j av a 2 s. c om * @param K number of clusters * @param useLabels if true, use * @return */ protected Set<Riffle> batchCluster(List<Instance> D, int K, boolean useLabels) { assert K >= 2 : "Minimum number of clusters (K) is 2"; int numAttributes = D.get(0).numAttributes(); TreeSet<Riffle> ret = new TreeSet<>(); TreeSet<Integer> labels = new TreeSet<>(); TreeMap<Integer, TreeSet<Riffle>> potentialClusters = new TreeMap<>(); //Create a potential cluster pool. Seperate into seperate pools by label if useLabels is set to true: for (Instance x : D) { int label = (useLabels) ? (int) x.classValue() : 0; labels.add(label); TreeSet<Riffle> clusterSet = potentialClusters.get(label); if (clusterSet == null) { clusterSet = new TreeSet<>(); } clusterSet.add(this.createNewCluster(x)); potentialClusters.put(label, clusterSet); } // Initialize following the K-Means++ approach: Riffle C = potentialClusters.firstEntry().getValue().first(); ret.add(C); potentialClusters.firstEntry().getValue().remove(C); Iterator<Integer> labelIter = labels.iterator(); while ((ret.size() < K) && !potentialClusters.isEmpty()) { if (!labelIter.hasNext()) { labelIter = labels.iterator(); } // loop around as needed int pseudoLabel = labelIter.next(); TreeSet<Riffle> clusterSet = potentialClusters.get(pseudoLabel); if (clusterSet.isEmpty()) { potentialClusters.remove(pseudoLabel); labelIter.remove(); continue; } SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(clusterSet, C.toInstance()); C = nearestClusters.last().getCluster(); ret.add(C); clusterSet.remove(C); } potentialClusters.clear(); // Iterate final int maxIterations = 100; final double minDelta = 0.0001; int iteration = 0; double valIdxDelta = 1.0; ValIdxTupleType lastScore = null; while ((iteration < maxIterations) && (valIdxDelta > minDelta)) { iteration++; ret.parallelStream().forEach((c) -> { c.cleanTallies(); if (c.instances == null) { c.instances = c.getHeader(); } c.instances.clear(); }); // Expectation Step boolean wasAdded; for (Instance x : D) { SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(ret, x); wasAdded = false; int xLabel = (int) x.classValue(); int cLabel = 0; if (useLabels) { // Add to nearest cluster with same label for (NearestClusterTuple nct : nearestClusters) { cLabel = (int) nct.getCluster().getGroundTruth(); if (cLabel == xLabel) { nct.getCluster().addInstance(x); nct.getCluster().instances.add(x); wasAdded = true; //break; } } } // just add to the closest cluster if (!wasAdded) { nearestClusters.last().getCluster().instances.add(x); } } // Maximization Step for (Riffle c : ret) { if (c.instances == null || c.instances.isEmpty()) { continue; } double[] clusterCentroid = new double[numAttributes]; double[] clusterVariance = new double[numAttributes]; for (Instance x : c.instances) { // Pre-populate univeral cluster with data points double[] xValues = x.toDoubleArray(); for (int i = 0; i < xValues.length; ++i) { clusterCentroid[i] += xValues[i] / ((double) c.instances.size()); } } // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so // we use the 2-Pass method for computing sample variance (per dimension) if (c.instances.size() < 2) { for (int i = 0; i < clusterVariance.length; ++i) { clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance } } else { double n = c.instances.size(); double[] cep = new double[numAttributes]; Arrays.fill(cep, 0); for (Instance x : c.instances) { double[] xValues = x.toDoubleArray(); for (int i = 0; i < xValues.length; ++i) { double delta = clusterCentroid[i] - xValues[i]; cep[i] += delta; clusterVariance[i] += delta * delta; // Statistical Variance } } for (int i = 0; i < clusterVariance.length; ++i) { clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1); } } c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below c.setVariances(clusterVariance); c.recompute(); // this updates entropies and such // double[] clusterCentroid = new double[numAttributes]; // Arrays.fill(clusterCentroid, 0); // for (Instance x : c.instances) { // Pre-populate univeral cluster with data points // double[] xValues = x.toDoubleArray(); // for (int i = 0; i < xValues.length; ++i) { // clusterCentroid[i] += xValues[i] / ((double) c.instances.size()); // } // } // c.setCenter(clusterCentroid); } ValIdxTupleType currentScore = new ValIdxTupleType(ret); if (lastScore != null) { double diff = Math.abs(lastScore.getValIdx() - currentScore.getValIdx()); double denominator = lastScore.getValIdx(); valIdxDelta = (denominator == 0) ? 0.0 : Math.abs(diff / denominator); } lastScore = currentScore; } // end while return ret; }
From source file:moa.clusterer.FeS2.java
License:Apache License
/** * Uses methodology from Kim et al. "A Novel Validity Index for Determination of the Optimal Number of Clusters" * @param D Warm-up data set//from ww w.j a v a 2 s.c o m */ public void initialize(List<Instance> D) { assert (D == null || D.isEmpty() || D.get(0) == null) : "FeS::initialize() called with a null data list!"; knownLabels.clear(); universalProbabilitySums = 0; bestProbabilitySums = 0; bestProbabilityCount = 0; // Setup the universal set/cluster. Note that this will be crucial for subspace selection (cross-entropy checks against null hypothesis) double[] universalCentroid = new double[D.get(0).numAttributes()]; double[] universalVariance = new double[D.get(0).numAttributes()]; Arrays.fill(universalCentroid, 0); Arrays.fill(universalVariance, 0); universalCluster = new Riffle(D.get(0)); universalCluster.updateStrategyOption.setChosenIndex(this.updateStrategyOption.getChosenIndex()); universalCluster.outlierDefinitionStrategyOption .setChosenIndex(this.outlierDefinitionStrategyOption.getChosenIndex()); universalCluster.distanceStrategyOption.setChosenIndex(this.distanceStrategyOption.getChosenIndex()); universalCluster.initialStandardDeviationOption.setValue(this.initialStandardDeviationOption.getValue()); universalCluster.alphaAdjustmentWeightOption.setValue(this.learningRateAlphaOption.getValue()); //universalCluster.setParentClusterer(this); if (D.size() > 1) { double[] ep = new double[universalCentroid.length]; Arrays.fill(ep, 0); universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below universalCluster.setVariances(universalVariance); // temporary - start with standard gaussian, will update below universalCluster.setWeight(0); double N = D.size(); for (Instance x : D) { // Pre-populate univeral cluster with data points knownLabels.add((int) x.classValue()); universalCluster.addInstance(x); double[] xValues = x.toDoubleArray(); for (int i = 0; i < xValues.length; ++i) { universalCentroid[i] += xValues[i]; } } for (int i = 0; i < universalCentroid.length; ++i) { universalCentroid[i] /= N; } // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so // we use the 2-Pass method for computing sample variance (per dimension) for (Instance x : D) { double[] xValues = x.toDoubleArray(); for (int i = 0; i < xValues.length; ++i) { double delta = universalCentroid[i] - xValues[i]; ep[i] += delta; universalVariance[i] += delta * delta; } } for (int i = 0; i < universalVariance.length; ++i) { universalVariance[i] = (universalVariance[i] - ep[i] * ep[i] / N) / (N - 1); } universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below universalCluster.setVariances(universalVariance); } universalCluster.recompute(); // this updates entropies and such // Ok, now let's use K-Means to find the initial cluster set int Cmin = this.clustersPerLabelOption.getValue() * this.knownLabels.size(); int Cmax = Cmin + 1; if (optimizeInitialClusterNumberOption.isSet()) { Cmin = this.minimumNumberOfClusterSizeOption.getValue();//Math.max(knownLabels.size(), 2); Cmax = Math.max(Cmin + 1, Math.min(this.clustersPerLabelOption.getValue() * this.knownLabels.size(), this.maximumNumberOfClusterSizeOption.getValue())); } ArrayList<ValIdxTupleType> valIdxSet = new ArrayList<>(Cmax); Set<Riffle> V; // Create multiple hypothesis for best K choices: for (int c = Cmin; c < Cmax; c++) { V = batchCluster(D, c, true); ValIdxTupleType i = new ValIdxTupleType(V); valIdxSet.add(i); if (CVI == null) { CVI = i; } else { CVI.setVo_min(Math.min(i.getVo(), CVI.getVo_min())); CVI.setVo_max(Math.max(i.getVo(), CVI.getVo_max())); CVI.setVu_min(Math.min(i.getVu(), CVI.getVu_min())); CVI.setVu_max(Math.max(i.getVu(), CVI.getVu_max())); } } // Normalize all: valIdxSet.parallelStream().map((i) -> { i.setVo_min(CVI.getVo_min()); return i; }).map((i) -> { i.setVo_max(CVI.getVo_max()); return i; }).map((i) -> { i.setVu_min(CVI.getVu_min()); return i; }).forEach((i) -> { i.setVu_max(CVI.getVu_max()); }); // Find the best K by finding the minimum score: valIdxSet.stream().filter((i) -> (i.getValIdx() < CVI.getValIdx())).forEach((i) -> { CVI = i; }); BufferedWriter datawriter = null; // DEBUG BufferedWriter rawdatawriter = null; // DEBUG BufferedWriter clusterwriter = null; // DEBUG String filePrefix = "DEBUG-" + iso8601FormatString.format(new Date()); // DEBUG try { // DEBUG File warmupData = new File((filePrefix + "-first" + D.size() + ".csv")); // DEBUG File rawwarmupData = new File((filePrefix + "-raw" + D.size() + ".csv")); // DEBUG File clusterData = new File((filePrefix + "-clusters.csv")); // DEBUG datawriter = new BufferedWriter(new FileWriter(warmupData)); // DEBUG rawdatawriter = new BufferedWriter(new FileWriter(rawwarmupData)); // DEBUG clusterwriter = new BufferedWriter(new FileWriter(clusterData)); // DEBUG clusterwriter.write("id,s,w,r,e,p,y,c,v"); // DEBUG clusterwriter.newLine(); // DEBUG String csv = ""; // DEBUG int rowCount = 0; // DEBUG for (Instance x : D) { // DEBUG double[] dataArray = x.toDoubleArray(); // DEBUG for (int dIdx = 0; dIdx < dataArray.length; ++dIdx) { // DEBUG csv += dataArray[dIdx] + ","; // DEBUG } // DEBUG csv += ++rowCount; // DEBUG rawdatawriter.write(csv); // DEBUG rawdatawriter.newLine(); // DEBUG csv = ""; // DEBUG } // DEBUG for (Double uvar : universalVariance) { csv += uvar + ","; } rawdatawriter.write(csv); // DEBUG rawdatawriter.newLine(); // DEBUG csv = ""; for (Double umean : universalCentroid) { csv += umean + ","; } rawdatawriter.write(csv); // DEBUG rawdatawriter.newLine(); // DEBUG csv = ""; rawdatawriter.flush(); this.clusters.clear(); for (Riffle c : CVI.getClustering()) { if (c.instances == null || c.instances.isEmpty()) { continue; } double[] clusterCentroid = new double[universalCentroid.length]; double[] clusterVariance = new double[universalVariance.length]; for (Instance x : c.instances) { // Pre-populate univeral cluster with data points double[] xValues = x.toDoubleArray(); for (int i = 0; i < xValues.length; ++i) { clusterCentroid[i] += xValues[i] / ((double) c.instances.size()); } } // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so // we use the 2-Pass method for computing sample variance (per dimension) if (c.instances.size() < 2) { for (int i = 0; i < clusterVariance.length; ++i) { clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance } } else { double n = c.instances.size(); double[] cep = new double[universalCentroid.length]; Arrays.fill(cep, 0); for (Instance x : c.instances) { double[] xValues = x.toDoubleArray(); for (int i = 0; i < xValues.length; ++i) { double delta = clusterCentroid[i] - xValues[i]; cep[i] += delta; clusterVariance[i] += delta * delta; // Statistical Variance } } for (int i = 0; i < clusterVariance.length; ++i) { clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1); } } c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below c.setVariances(clusterVariance); c.recompute(); // this updates entropies and such // WRITE DEBUG DATA for (Instance x : c.instances) { double[] dataArray = x.toDoubleArray(); for (int dIdx = 0; dIdx < dataArray.length; ++dIdx) { csv += dataArray[dIdx] + ","; } csv += c.getId(); datawriter.write(csv); datawriter.newLine(); csv = ""; } // clusterwriter.write("id,w,r,e,p,y,c,v"); if (Double.isNaN(c.getRadius())) { System.out.print("Bad radius"); } clusterwriter.write(c.getId() + "," + c.size() + "," + c.getWeight() + "," + c.getRadius() + "," + c.getEntropy() + "," + c.getTruePurity() + "," + weka.core.Utils.maxIndex(c.getVotes()) + ",Centroid:," + weka.core.Utils.arrayToString(c.getCenter()) + ",Var:," + weka.core.Utils.arrayToString(c.getVariances())); clusterwriter.newLine(); // END DEBUG DATA this.clusters.add(c); } if (this.outlierDefinitionStrategyOption.getChosenIndex() == 1) { this.setupPerceptron(); double outlierPerceptronTrainingError = this.trainPerceptron(); System.out .println("outlier detection Perceptron training error = " + outlierPerceptronTrainingError); } this.clusters.stream().forEach((c) -> { c.instances.clear(); }); this.newClusterCreateCalls = 0; System.out.println( "Starting with " + this.clusters.size() + " clusters and " + this.knownLabels + " labels."); clusterwriter.flush(); // DEBUG clusterwriter.close(); // DEBUG datawriter.flush(); // DEBUG datawriter.close(); // DEBUG rawdatawriter.flush(); // DEBUG rawdatawriter.close(); // DEBUG } catch (IOException e) { } // DEBUG }