Example usage for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:moa.classifiers.rules.RuleClassifier.java

License:Apache License

public double computeAnomalySupervised(Rule rl, int ruleIndex, Instance inst) { //Not supervised
    ArrayList<Integer> caseAnomalyTemp = new ArrayList<Integer>();
    ArrayList<ArrayList<Double>> AttribAnomalyStatisticTemp2 = new ArrayList<ArrayList<Double>>();
    double D = 0.0;
    double N = 0.0;
    if (rl.instancesSeen > this.anomalyNumInstThresholdOption.getValue()
            && this.anomalyDetectionOption.isSet()) {
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            if (!inst.isMissing(x)) {
                ArrayList<Double> AttribAnomalyStatisticTemp = new ArrayList<Double>();
                if (inst.attribute(x).isNumeric()) { //Numeric Attributes
                    if ((rl.instancesSeen - rl.attributeMissingValues.getValue(x)) > 30) {
                        double mean = computeMean(
                                (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()),
                                (int) rl.obserClassDistrib.getValue((int) inst.classValue()));
                        double sd = computeSD(
                                (double) rl.squaredAttributeStatisticsSupervised.get(x)
                                        .get((int) inst.classValue()),
                                (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()),
                                (int) rl.obserClassDistrib.getValue((int) inst.classValue()));
                        double probability = computeProbability(mean, sd, inst.value(x));
                        if (probability != 0.0) {
                            D = D + Math.log(probability);
                            if (probability < this.probabilityThresholdOption.getValue()) { //0.10
                                N = N + Math.log(probability);
                                AttribAnomalyStatisticTemp.add((double) x);
                                AttribAnomalyStatisticTemp.add(inst.value(x));
                                AttribAnomalyStatisticTemp.add(mean);
                                AttribAnomalyStatisticTemp.add(sd);
                                AttribAnomalyStatisticTemp.add(probability);
                                AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp);
                            }/*from  w  ww  . ja va 2  s .com*/
                        }
                    }
                } else { //Nominal
                    double attribVal = inst.value(x); //Attribute value
                    double classVal = inst.classValue(); //Attribute value
                    double probability = rl.observers.get(x).probabilityOfAttributeValueGivenClass(attribVal,
                            (int) classVal);
                    if (probability != 0.0) {
                        D = D + Math.log(probability);
                        if (probability < this.probabilityThresholdOption.getValue()) { //0.10
                            N = N + Math.log(probability);
                            AttribAnomalyStatisticTemp.add((double) x);
                            AttribAnomalyStatisticTemp.add(inst.value(x));
                            AttribAnomalyStatisticTemp.add(probability);
                            AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp);
                        }
                    }
                }
            }
        }
    }
    double anomaly = 0.0;
    if (D != 0) {
        anomaly = Math.abs(N / D);
    }
    if (anomaly >= this.anomalyProbabilityThresholdOption.getValue()) {
        caseAnomalyTemp.add(this.numInstance);
        double val = anomaly * 100;
        caseAnomalyTemp.add((int) val);
        this.caseAnomalySupervised.add(caseAnomalyTemp);
        Rule y = new Rule(this.ruleSet.get(ruleIndex));
        this.ruleSetAnomaliesSupervised.add(y);
        this.ruleAnomaliesIndexSupervised.add(ruleIndex + 1);
        this.ruleAttribAnomalyStatisticsSupervised.add(AttribAnomalyStatisticTemp2);
    }
    return anomaly;
}

From source file:moa.classifiers.SingleClassifierDrift.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    int trueClass = (int) inst.classValue();
    boolean prediction;
    if (Utils.maxIndex(this.classifier.getVotesForInstance(inst)) == trueClass) {
        prediction = true;/* w w w.j  a  v a2s  . c o  m*/
    } else {
        prediction = false;
    }
    switch (this.driftDetectionMethod.computeNextVal(prediction)) {
    case DriftDetectionMethod.DDM_WARNING_LEVEL:
        //System.out.println("1 0 W");
        if (newClassifierReset == true) {
            this.newclassifier.resetLearning();
            newClassifierReset = false;
        }
        this.newclassifier.trainOnInstance(inst);
        break;

    case DriftDetectionMethod.DDM_OUTCONTROL_LEVEL:
        //System.out.println("0 1 O");
        this.classifier = null;
        this.classifier = this.newclassifier;
        if (this.classifier instanceof WEKAClassifier) {
            ((WEKAClassifier) this.classifier).buildClassifier();
        }
        this.newclassifier = (Classifier) getPreparedClassOption(this.baseLearnerOption);
        this.newclassifier.resetLearning();
        break;

    case DriftDetectionMethod.DDM_INCONTROL_LEVEL:
        //System.out.println("0 0 I");
        newClassifierReset = true;
        break;
    default:
        //System.out.println("ERROR!");

    }

    this.classifier.trainOnInstance(inst);
}

From source file:moa.classifiers.trees.ePTTD.java

License:Creative Commons License

@Override
public boolean correctlyClassifies(Instance inst) {
    // TODO Auto-generated method stub
    return Utils.maxIndex(getVotesForInstance(inst)) == (int) inst.classValue();
}

From source file:moa.cluster.Clustering.java

License:Apache License

public Clustering(List<? extends Instance> points) {
    HashMap<Integer, Integer> labelMap = classValues(points);
    int dim = points.get(0).dataset().numAttributes() - 1;

    int numClasses = labelMap.size();
    int noiseLabel;

    Attribute classLabel = points.get(0).dataset().classAttribute();
    int lastLabelIndex = classLabel.numValues() - 1;
    if (classLabel.value(lastLabelIndex) == "noise") {
        noiseLabel = lastLabelIndex;//w w w  .j  a  v  a 2 s  . c om
    } else {
        noiseLabel = -1;
    }

    ArrayList<Instance>[] sorted_points = (ArrayList<Instance>[]) new ArrayList[numClasses];
    for (int i = 0; i < numClasses; i++) {
        sorted_points[i] = new ArrayList<Instance>();
    }
    for (Instance point : points) {
        int clusterid = (int) point.classValue();
        if (clusterid == noiseLabel)
            continue;
        sorted_points[labelMap.get(clusterid)].add((Instance) point);
    }
    this.clusters = new AutoExpandVector<Cluster>();
    for (int i = 0; i < numClasses; i++) {
        if (sorted_points[i].size() > 0) {
            SphereCluster s = new SphereCluster(sorted_points[i], dim);
            s.setId(sorted_points[i].get(0).classValue());
            s.setGroundTruth(sorted_points[i].get(0).classValue());
            clusters.add(s);
        }
    }
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Create a new cluster from an exemplar data point
 * @param x /* w  w w  .j  av  a  2s. com*/
 */
public Riffle(Instance x) {
    safeInit(x);
    this.numLabeledPoints = (int) Math.ceil(x.weight());
    this.labelFrequencies[(int) x.classValue()] += x.weight();
    this.gtLabelFrequencies[(int) x.classValue()]++;
    for (int i = 0; (i < this.symbolFrequencies.length) && (i < x.numAttributes()); ++i) {
        double value = x.value(i);
        if (this.symbolFrequencies[i] == null) {
            if ((this.parentClusterer != null) && (this.parentClusterer.getUniverse() != null)) {
                this.variances[i] = this.parentClusterer.getUniverse().variances[i];
            } else {
                this.variances[i] = this.initialStandardDeviationOption.getValue();
            }
        } else {
            this.variances[i] = 1;
            this.symbolFrequencies[i][(int) value]++;
        }
    }
    this.numTotalPoints = 1;
    this.setGroundTruth(x.classValue());
    this.setCenter(x.toDoubleArray());
    this.setWeight(x.weight());
    this.setRadius(this.initialStandardDeviationOption.getValue());
    this.runningSumOfSquares = 0.0;
    this.setId(autoindex.getAndIncrement());
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Add a data point instance to this cluster
 *
 * @param x/*  w w  w  .  j  a  v  a 2  s.c  o  m*/
 */
final public void addInstance(Instance x) {
    safeInit(x);
    this.numTotalPoints++;
    this.numLabeledPoints += (x.weight() > 0.9999) ? 1 : 0;
    this.labelFrequencies[(int) x.classValue()] += x.weight(); //non-training data has a weight of zero
    this.gtLabelFrequencies[(int) x.classValue()]++; // For non-decision metrics only
    //Select strategy for on-line *-means (Any means)
    switch (updateStrategyOption.getChosenIndex()) {
    case 0:
        this.addInstanceGrimson(x);
        break;
    case 1:
        this.addInstanceViaShephard(x);
        break;
    case 2:
        this.instances.add(x);
        return;
    default:
        System.err.println("Invalid addInstance strategy");
    }
    recompute();
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Inverse process of adding instance/* ww w . j  av  a 2 s  . c o  m*/
 *
 * @param x
 */
final public void removeInstance(Instance x) {
    safeInit(x);
    this.numLabeledPoints -= (int) Math.ceil(x.weight());
    this.labelFrequencies[(int) x.classValue()] -= x.weight(); //non-training data has a weight of zero
    this.gtLabelFrequencies[(int) x.classValue()]--; // For non-decision metrics only
    this.numTotalPoints--;

    //Select strategy for on-line *-means
    switch (updateStrategyOption.getChosenIndex()) {
    case 0:
        this.removeInstanceGrimson(x);
        break;
    case 1:
        this.removeInstanceViaShephard(x);
        break;
    case 2:
        this.instances.remove(x);
        return;
    default:
        System.err.println("Invalid removeInstance strategy");
    }
    recompute();
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Set pre-computed information fields//from   www.  ja  v  a 2  s  . co  m
 * @return 
 */
public final double recomputeAll() {
    if (this.instances != null) {
        Arrays.fill(this.gtLabelFrequencies, 0);
        Arrays.fill(this.labelFrequencies, 0);
        this.numTotalPoints = instances.size();
        this.numLabeledPoints = 0;
        if (!this.instances.isEmpty()) {
            // double[] clusterCentroid = this.getCenter();
            double[] clusterVariance = this.getVariances();
            for (int i = 0; i < centroid.length; ++i) {
                centroid[i] /= (double) this.instances.size() + 1.0;
            }
            for (double[] sf : this.symbolFrequencies) {
                if (sf != null) {
                    Arrays.fill(sf, 0);
                }
            }
            for (Instance x : this.instances) { // Pre-populate univeral cluster with data points
                if (x == null) {
                    System.out.println("Sieve::MaximizationStep() - x is NULL!");
                    continue;
                }
                this.gtLabelFrequencies[(int) x.classValue()]++;
                this.labelFrequencies[(int) x.classValue()] += x.weight();
                this.numLabeledPoints += x.weight();
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    double val = xValues[i];
                    centroid[i] += val / ((double) this.instances.size() + 1.0);
                    if ((this.symbolFrequencies[i] != null) && (val < this.symbolFrequencies[i].length)) {
                        this.symbolFrequencies[i][(int) val]++;
                    }
                }
            } // for

            // Set 'centroid' to 'mode' (most frequent symbol) for nominal data:
            for (int i = 0; i < this.symbolFrequencies.length; ++i) {
                if (this.symbolFrequencies[i] != null) {
                    centroid[i] = weka.core.Utils.maxIndex(this.symbolFrequencies[i]);
                }
            }
            setCenter(centroid); // temporary - start with standard gaussian, gets updated below
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            double n = instances.size();
            if (n > 1) {
                double[] cep = new double[centroid.length];
                Arrays.fill(cep, 0);
                Arrays.fill(clusterVariance, 0);
                for (Instance x : this.instances) {
                    if (x == null) {
                        System.out.println("Riffle::recompute() - x is null!");
                        continue;
                    }
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = (this.symbolFrequencies[i] == null) ? centroid[i] - xValues[i]
                                : (Math.abs(centroid[i] - xValues[i]) < 1e-32) ? 1 : 1e-20;
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
                setVariances(clusterVariance);
            } // end if (enough data for variance)
        } // end if(!instances.empty)
        recompute();
    } // end if(!instances null)
    return getRadius() * getEntropy();
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * Wrapper for parallel K-Means for processing warm-up data set
 * @param D Warm-up data set//from   www .j  av  a 2 s.  c  om
 * @param K number of clusters
 * @param useLabels if true, use
 * @return 
 */
protected Set<Riffle> batchCluster(List<Instance> D, int K, boolean useLabels) {
    assert K >= 2 : "Minimum number of clusters (K) is 2";
    int numAttributes = D.get(0).numAttributes();
    TreeSet<Riffle> ret = new TreeSet<>();
    TreeSet<Integer> labels = new TreeSet<>();
    TreeMap<Integer, TreeSet<Riffle>> potentialClusters = new TreeMap<>();
    //Create a potential cluster pool. Seperate into seperate pools by label if useLabels is set to true:
    for (Instance x : D) {
        int label = (useLabels) ? (int) x.classValue() : 0;
        labels.add(label);
        TreeSet<Riffle> clusterSet = potentialClusters.get(label);
        if (clusterSet == null) {
            clusterSet = new TreeSet<>();
        }
        clusterSet.add(this.createNewCluster(x));
        potentialClusters.put(label, clusterSet);
    }

    // Initialize following the K-Means++ approach:
    Riffle C = potentialClusters.firstEntry().getValue().first();
    ret.add(C);
    potentialClusters.firstEntry().getValue().remove(C);

    Iterator<Integer> labelIter = labels.iterator();
    while ((ret.size() < K) && !potentialClusters.isEmpty()) {
        if (!labelIter.hasNext()) {
            labelIter = labels.iterator();
        } // loop around as needed
        int pseudoLabel = labelIter.next();
        TreeSet<Riffle> clusterSet = potentialClusters.get(pseudoLabel);
        if (clusterSet.isEmpty()) {
            potentialClusters.remove(pseudoLabel);
            labelIter.remove();
            continue;
        }
        SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(clusterSet, C.toInstance());
        C = nearestClusters.last().getCluster();
        ret.add(C);
        clusterSet.remove(C);
    }
    potentialClusters.clear();

    // Iterate 
    final int maxIterations = 100;
    final double minDelta = 0.0001;
    int iteration = 0;
    double valIdxDelta = 1.0;
    ValIdxTupleType lastScore = null;
    while ((iteration < maxIterations) && (valIdxDelta > minDelta)) {
        iteration++;
        ret.parallelStream().forEach((c) -> {
            c.cleanTallies();
            if (c.instances == null) {
                c.instances = c.getHeader();
            }
            c.instances.clear();
        });

        // Expectation Step
        boolean wasAdded;
        for (Instance x : D) {
            SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(ret, x);
            wasAdded = false;
            int xLabel = (int) x.classValue();
            int cLabel = 0;
            if (useLabels) {
                // Add to nearest cluster with same label
                for (NearestClusterTuple nct : nearestClusters) {
                    cLabel = (int) nct.getCluster().getGroundTruth();
                    if (cLabel == xLabel) {
                        nct.getCluster().addInstance(x);
                        nct.getCluster().instances.add(x);
                        wasAdded = true;
                        //break;
                    }
                }
            }
            // just add to the closest cluster
            if (!wasAdded) {
                nearestClusters.last().getCluster().instances.add(x);
            }
        }

        // Maximization Step
        for (Riffle c : ret) {
            if (c.instances == null || c.instances.isEmpty()) {
                continue;
            }
            double[] clusterCentroid = new double[numAttributes];
            double[] clusterVariance = new double[numAttributes];
            for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
                }
            }
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            if (c.instances.size() < 2) {
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance
                }
            } else {
                double n = c.instances.size();
                double[] cep = new double[numAttributes];
                Arrays.fill(cep, 0);
                for (Instance x : c.instances) {
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = clusterCentroid[i] - xValues[i];
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
            }
            c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below
            c.setVariances(clusterVariance);
            c.recompute(); // this updates entropies and such
            //                double[] clusterCentroid = new double[numAttributes];
            //                Arrays.fill(clusterCentroid, 0);
            //                for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
            //                    double[] xValues = x.toDoubleArray();
            //                    for (int i = 0; i < xValues.length; ++i) {
            //                        clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
            //                    }
            //                }
            //                c.setCenter(clusterCentroid);
        }

        ValIdxTupleType currentScore = new ValIdxTupleType(ret);
        if (lastScore != null) {
            double diff = Math.abs(lastScore.getValIdx() - currentScore.getValIdx());
            double denominator = lastScore.getValIdx();
            valIdxDelta = (denominator == 0) ? 0.0 : Math.abs(diff / denominator);
        }
        lastScore = currentScore;
    } // end while
    return ret;
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * Uses methodology from Kim et al. "A Novel Validity Index for Determination of the Optimal Number of Clusters"
 * @param D Warm-up data set//from   ww w.j  a  v a 2 s.c  o  m
 */
public void initialize(List<Instance> D) {
    assert (D == null || D.isEmpty() || D.get(0) == null) : "FeS::initialize() called with a null data list!";
    knownLabels.clear();
    universalProbabilitySums = 0;
    bestProbabilitySums = 0;
    bestProbabilityCount = 0;
    // Setup the universal set/cluster. Note that this will be crucial for subspace selection (cross-entropy checks against null hypothesis)
    double[] universalCentroid = new double[D.get(0).numAttributes()];
    double[] universalVariance = new double[D.get(0).numAttributes()];
    Arrays.fill(universalCentroid, 0);
    Arrays.fill(universalVariance, 0);
    universalCluster = new Riffle(D.get(0));
    universalCluster.updateStrategyOption.setChosenIndex(this.updateStrategyOption.getChosenIndex());
    universalCluster.outlierDefinitionStrategyOption
            .setChosenIndex(this.outlierDefinitionStrategyOption.getChosenIndex());
    universalCluster.distanceStrategyOption.setChosenIndex(this.distanceStrategyOption.getChosenIndex());
    universalCluster.initialStandardDeviationOption.setValue(this.initialStandardDeviationOption.getValue());
    universalCluster.alphaAdjustmentWeightOption.setValue(this.learningRateAlphaOption.getValue());
    //universalCluster.setParentClusterer(this);
    if (D.size() > 1) {
        double[] ep = new double[universalCentroid.length];
        Arrays.fill(ep, 0);
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance); // temporary - start with standard gaussian, will update below
        universalCluster.setWeight(0);
        double N = D.size();
        for (Instance x : D) { // Pre-populate univeral cluster with data points
            knownLabels.add((int) x.classValue());
            universalCluster.addInstance(x);
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                universalCentroid[i] += xValues[i];
            }
        }
        for (int i = 0; i < universalCentroid.length; ++i) {
            universalCentroid[i] /= N;
        }
        // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
        // we use the 2-Pass method for computing sample variance (per dimension)
        for (Instance x : D) {
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                double delta = universalCentroid[i] - xValues[i];
                ep[i] += delta;
                universalVariance[i] += delta * delta;
            }
        }
        for (int i = 0; i < universalVariance.length; ++i) {
            universalVariance[i] = (universalVariance[i] - ep[i] * ep[i] / N) / (N - 1);
        }
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance);
    }
    universalCluster.recompute(); // this updates entropies and such

    // Ok, now let's use K-Means to find the initial cluster set
    int Cmin = this.clustersPerLabelOption.getValue() * this.knownLabels.size();
    int Cmax = Cmin + 1;
    if (optimizeInitialClusterNumberOption.isSet()) {
        Cmin = this.minimumNumberOfClusterSizeOption.getValue();//Math.max(knownLabels.size(), 2);
        Cmax = Math.max(Cmin + 1, Math.min(this.clustersPerLabelOption.getValue() * this.knownLabels.size(),
                this.maximumNumberOfClusterSizeOption.getValue()));
    }
    ArrayList<ValIdxTupleType> valIdxSet = new ArrayList<>(Cmax);
    Set<Riffle> V;
    // Create multiple hypothesis for best K choices:
    for (int c = Cmin; c < Cmax; c++) {
        V = batchCluster(D, c, true);
        ValIdxTupleType i = new ValIdxTupleType(V);
        valIdxSet.add(i);
        if (CVI == null) {
            CVI = i;
        } else {
            CVI.setVo_min(Math.min(i.getVo(), CVI.getVo_min()));
            CVI.setVo_max(Math.max(i.getVo(), CVI.getVo_max()));
            CVI.setVu_min(Math.min(i.getVu(), CVI.getVu_min()));
            CVI.setVu_max(Math.max(i.getVu(), CVI.getVu_max()));
        }
    }

    // Normalize all:
    valIdxSet.parallelStream().map((i) -> {
        i.setVo_min(CVI.getVo_min());
        return i;
    }).map((i) -> {
        i.setVo_max(CVI.getVo_max());
        return i;
    }).map((i) -> {
        i.setVu_min(CVI.getVu_min());
        return i;
    }).forEach((i) -> {
        i.setVu_max(CVI.getVu_max());
    });

    // Find the best K by finding the minimum score:
    valIdxSet.stream().filter((i) -> (i.getValIdx() < CVI.getValIdx())).forEach((i) -> {
        CVI = i;
    });

    BufferedWriter datawriter = null; // DEBUG
    BufferedWriter rawdatawriter = null; // DEBUG
    BufferedWriter clusterwriter = null; // DEBUG
    String filePrefix = "DEBUG-" + iso8601FormatString.format(new Date()); // DEBUG
    try { // DEBUG
        File warmupData = new File((filePrefix + "-first" + D.size() + ".csv")); // DEBUG
        File rawwarmupData = new File((filePrefix + "-raw" + D.size() + ".csv")); // DEBUG
        File clusterData = new File((filePrefix + "-clusters.csv")); // DEBUG
        datawriter = new BufferedWriter(new FileWriter(warmupData)); // DEBUG
        rawdatawriter = new BufferedWriter(new FileWriter(rawwarmupData)); // DEBUG
        clusterwriter = new BufferedWriter(new FileWriter(clusterData)); // DEBUG
        clusterwriter.write("id,s,w,r,e,p,y,c,v"); // DEBUG
        clusterwriter.newLine(); // DEBUG
        String csv = ""; // DEBUG
        int rowCount = 0; // DEBUG
        for (Instance x : D) { // DEBUG
            double[] dataArray = x.toDoubleArray(); // DEBUG
            for (int dIdx = 0; dIdx < dataArray.length; ++dIdx) { // DEBUG
                csv += dataArray[dIdx] + ","; // DEBUG
            } // DEBUG
            csv += ++rowCount; // DEBUG
            rawdatawriter.write(csv); // DEBUG
            rawdatawriter.newLine(); // DEBUG
            csv = ""; // DEBUG
        } // DEBUG
        for (Double uvar : universalVariance) {
            csv += uvar + ",";
        }
        rawdatawriter.write(csv); // DEBUG
        rawdatawriter.newLine(); // DEBUG
        csv = "";
        for (Double umean : universalCentroid) {
            csv += umean + ",";
        }
        rawdatawriter.write(csv); // DEBUG
        rawdatawriter.newLine(); // DEBUG
        csv = "";
        rawdatawriter.flush();
        this.clusters.clear();
        for (Riffle c : CVI.getClustering()) {
            if (c.instances == null || c.instances.isEmpty()) {
                continue;
            }
            double[] clusterCentroid = new double[universalCentroid.length];
            double[] clusterVariance = new double[universalVariance.length];
            for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
                }
            }
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            if (c.instances.size() < 2) {
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance
                }
            } else {
                double n = c.instances.size();
                double[] cep = new double[universalCentroid.length];
                Arrays.fill(cep, 0);
                for (Instance x : c.instances) {
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = clusterCentroid[i] - xValues[i];
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
            }
            c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below
            c.setVariances(clusterVariance);
            c.recompute(); // this updates entropies and such

            // WRITE DEBUG DATA

            for (Instance x : c.instances) {
                double[] dataArray = x.toDoubleArray();
                for (int dIdx = 0; dIdx < dataArray.length; ++dIdx) {
                    csv += dataArray[dIdx] + ",";
                }
                csv += c.getId();
                datawriter.write(csv);
                datawriter.newLine();
                csv = "";
            }

            //              clusterwriter.write("id,w,r,e,p,y,c,v");
            if (Double.isNaN(c.getRadius())) {
                System.out.print("Bad radius");
            }
            clusterwriter.write(c.getId() + "," + c.size() + "," + c.getWeight() + "," + c.getRadius() + ","
                    + c.getEntropy() + "," + c.getTruePurity() + "," + weka.core.Utils.maxIndex(c.getVotes())
                    + ",Centroid:," + weka.core.Utils.arrayToString(c.getCenter()) + ",Var:,"
                    + weka.core.Utils.arrayToString(c.getVariances()));
            clusterwriter.newLine();
            // END DEBUG DATA

            this.clusters.add(c);
        }
        if (this.outlierDefinitionStrategyOption.getChosenIndex() == 1) {
            this.setupPerceptron();
            double outlierPerceptronTrainingError = this.trainPerceptron();
            System.out
                    .println("outlier detection Perceptron training error = " + outlierPerceptronTrainingError);
        }
        this.clusters.stream().forEach((c) -> {
            c.instances.clear();
        });
        this.newClusterCreateCalls = 0;
        System.out.println(
                "Starting with " + this.clusters.size() + " clusters and " + this.knownLabels + " labels.");

        clusterwriter.flush(); // DEBUG
        clusterwriter.close(); // DEBUG
        datawriter.flush(); // DEBUG
        datawriter.close(); // DEBUG
        rawdatawriter.flush(); // DEBUG
        rawdatawriter.close(); // DEBUG
    } catch (IOException e) {
    } // DEBUG
}