Example usage for weka.core Instance classValue

List of usage examples for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:moa.classifiers.rules.RuleClassifier.java

License:Apache License

public double computeAnomalySupervised(Rule rl, int ruleIndex, Instance inst) { //Not supervised
    ArrayList<Integer> caseAnomalyTemp = new ArrayList<Integer>();
    ArrayList<ArrayList<Double>> AttribAnomalyStatisticTemp2 = new ArrayList<ArrayList<Double>>();
    double D = 0.0;
    double N = 0.0;
    if (rl.instancesSeen > this.anomalyNumInstThresholdOption.getValue()
            && this.anomalyDetectionOption.isSet()) {
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            if (!inst.isMissing(x)) {
                ArrayList<Double> AttribAnomalyStatisticTemp = new ArrayList<Double>();
                if (inst.attribute(x).isNumeric()) { //Numeric Attributes
                    if ((rl.instancesSeen - rl.attributeMissingValues.getValue(x)) > 30) {
                        double mean = computeMean(
                                (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()),
                                (int) rl.obserClassDistrib.getValue((int) inst.classValue()));
                        double sd = computeSD(
                                (double) rl.squaredAttributeStatisticsSupervised.get(x)
                                        .get((int) inst.classValue()),
                                (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()),
                                (int) rl.obserClassDistrib.getValue((int) inst.classValue()));
                        double probability = computeProbability(mean, sd, inst.value(x));
                        if (probability != 0.0) {
                            D = D + Math.log(probability);
                            if (probability < this.probabilityThresholdOption.getValue()) { //0.10
                                N = N + Math.log(probability);
                                AttribAnomalyStatisticTemp.add((double) x);
                                AttribAnomalyStatisticTemp.add(inst.value(x));
                                AttribAnomalyStatisticTemp.add(mean);
                                AttribAnomalyStatisticTemp.add(sd);
                                AttribAnomalyStatisticTemp.add(probability);
                                AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp);
                            }/*from  w  ww  . ja va 2  s .com*/
                        }
                    }
                } else { //Nominal
                    double attribVal = inst.value(x); //Attribute value
                    double classVal = inst.classValue(); //Attribute value
                    double probability = rl.observers.get(x).probabilityOfAttributeValueGivenClass(attribVal,
                            (int) classVal);
                    if (probability != 0.0) {
                        D = D + Math.log(probability);
                        if (probability < this.probabilityThresholdOption.getValue()) { //0.10
                            N = N + Math.log(probability);
                            AttribAnomalyStatisticTemp.add((double) x);
                            AttribAnomalyStatisticTemp.add(inst.value(x));
                            AttribAnomalyStatisticTemp.add(probability);
                            AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp);
                        }
                    }
                }
            }
        }
    }
    double anomaly = 0.0;
    if (D != 0) {
        anomaly = Math.abs(N / D);
    }
    if (anomaly >= this.anomalyProbabilityThresholdOption.getValue()) {
        caseAnomalyTemp.add(this.numInstance);
        double val = anomaly * 100;
        caseAnomalyTemp.add((int) val);
        this.caseAnomalySupervised.add(caseAnomalyTemp);
        Rule y = new Rule(this.ruleSet.get(ruleIndex));
        this.ruleSetAnomaliesSupervised.add(y);
        this.ruleAnomaliesIndexSupervised.add(ruleIndex + 1);
        this.ruleAttribAnomalyStatisticsSupervised.add(AttribAnomalyStatisticTemp2);
    }
    return anomaly;
}

From source file:moa.classifiers.SingleClassifierDrift.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    int trueClass = (int) inst.classValue();
    boolean prediction;
    if (Utils.maxIndex(this.classifier.getVotesForInstance(inst)) == trueClass) {
        prediction = true;/* w w w.j  a  v a2s  . c o  m*/
    } else {
        prediction = false;
    }
    switch (this.driftDetectionMethod.computeNextVal(prediction)) {
    case DriftDetectionMethod.DDM_WARNING_LEVEL:
        //System.out.println("1 0 W");
        if (newClassifierReset == true) {
            this.newclassifier.resetLearning();
            newClassifierReset = false;
        }
        this.newclassifier.trainOnInstance(inst);
        break;

    case DriftDetectionMethod.DDM_OUTCONTROL_LEVEL:
        //System.out.println("0 1 O");
        this.classifier = null;
        this.classifier = this.newclassifier;
        if (this.classifier instanceof WEKAClassifier) {
            ((WEKAClassifier) this.classifier).buildClassifier();
        }
        this.newclassifier = (Classifier) getPreparedClassOption(this.baseLearnerOption);
        this.newclassifier.resetLearning();
        break;

    case DriftDetectionMethod.DDM_INCONTROL_LEVEL:
        //System.out.println("0 0 I");
        newClassifierReset = true;
        break;
    default:
        //System.out.println("ERROR!");

    }

    this.classifier.trainOnInstance(inst);
}

From source file:moa.classifiers.trees.ePTTD.java

License:Creative Commons License

@Override
public boolean correctlyClassifies(Instance inst) {
    // TODO Auto-generated method stub
    return Utils.maxIndex(getVotesForInstance(inst)) == (int) inst.classValue();
}

From source file:moa.cluster.Clustering.java

License:Apache License

public Clustering(List<? extends Instance> points) {
    HashMap<Integer, Integer> labelMap = classValues(points);
    int dim = points.get(0).dataset().numAttributes() - 1;

    int numClasses = labelMap.size();
    int noiseLabel;

    Attribute classLabel = points.get(0).dataset().classAttribute();
    int lastLabelIndex = classLabel.numValues() - 1;
    if (classLabel.value(lastLabelIndex) == "noise") {
        noiseLabel = lastLabelIndex;//w w w  .j  a  v  a 2 s  . c om
    } else {
        noiseLabel = -1;
    }

    ArrayList<Instance>[] sorted_points = (ArrayList<Instance>[]) new ArrayList[numClasses];
    for (int i = 0; i < numClasses; i++) {
        sorted_points[i] = new ArrayList<Instance>();
    }
    for (Instance point : points) {
        int clusterid = (int) point.classValue();
        if (clusterid == noiseLabel)
            continue;
        sorted_points[labelMap.get(clusterid)].add((Instance) point);
    }
    this.clusters = new AutoExpandVector<Cluster>();
    for (int i = 0; i < numClasses; i++) {
        if (sorted_points[i].size() > 0) {
            SphereCluster s = new SphereCluster(sorted_points[i], dim);
            s.setId(sorted_points[i].get(0).classValue());
            s.setGroundTruth(sorted_points[i].get(0).classValue());
            clusters.add(s);
        }
    }
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Create a new cluster from an exemplar data point
 * @param x /* w  w w  .j  av  a  2s. com*/
 */
public Riffle(Instance x) {
    safeInit(x);
    this.numLabeledPoints = (int) Math.ceil(x.weight());
    this.labelFrequencies[(int) x.classValue()] += x.weight();
    this.gtLabelFrequencies[(int) x.classValue()]++;
    for (int i = 0; (i < this.symbolFrequencies.length) && (i < x.numAttributes()); ++i) {
        double value = x.value(i);
        if (this.symbolFrequencies[i] == null) {
            if ((this.parentClusterer != null) && (this.parentClusterer.getUniverse() != null)) {
                this.variances[i] = this.parentClusterer.getUniverse().variances[i];
            } else {
                this.variances[i] = this.initialStandardDeviationOption.getValue();
            }
        } else {
            this.variances[i] = 1;
            this.symbolFrequencies[i][(int) value]++;
        }
    }
    this.numTotalPoints = 1;
    this.setGroundTruth(x.classValue());
    this.setCenter(x.toDoubleArray());
    this.setWeight(x.weight());
    this.setRadius(this.initialStandardDeviationOption.getValue());
    this.runningSumOfSquares = 0.0;
    this.setId(autoindex.getAndIncrement());
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Add a data point instance to this cluster
 *
 * @param x/*  w w  w  .  j  a  v  a 2  s.c  o  m*/
 */
final public void addInstance(Instance x) {
    safeInit(x);
    this.numTotalPoints++;
    this.numLabeledPoints += (x.weight() > 0.9999) ? 1 : 0;
    this.labelFrequencies[(int) x.classValue()] += x.weight(); //non-training data has a weight of zero
    this.gtLabelFrequencies[(int) x.classValue()]++; // For non-decision metrics only
    //Select strategy for on-line *-means (Any means)
    switch (updateStrategyOption.getChosenIndex()) {
    case 0:
        this.addInstanceGrimson(x);
        break;
    case 1:
        this.addInstanceViaShephard(x);
        break;
    case 2:
        this.instances.add(x);
        return;
    default:
        System.err.println("Invalid addInstance strategy");
    }
    recompute();
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Inverse process of adding instance/* ww w . j  av  a 2 s  . c o  m*/
 *
 * @param x
 */
final public void removeInstance(Instance x) {
    safeInit(x);
    this.numLabeledPoints -= (int) Math.ceil(x.weight());
    this.labelFrequencies[(int) x.classValue()] -= x.weight(); //non-training data has a weight of zero
    this.gtLabelFrequencies[(int) x.classValue()]--; // For non-decision metrics only
    this.numTotalPoints--;

    //Select strategy for on-line *-means
    switch (updateStrategyOption.getChosenIndex()) {
    case 0:
        this.removeInstanceGrimson(x);
        break;
    case 1:
        this.removeInstanceViaShephard(x);
        break;
    case 2:
        this.instances.remove(x);
        return;
    default:
        System.err.println("Invalid removeInstance strategy");
    }
    recompute();
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Set pre-computed information fields//from   www.  ja  v  a 2  s  . co  m
 * @return 
 */
public final double recomputeAll() {
    if (this.instances != null) {
        Arrays.fill(this.gtLabelFrequencies, 0);
        Arrays.fill(this.labelFrequencies, 0);
        this.numTotalPoints = instances.size();
        this.numLabeledPoints = 0;
        if (!this.instances.isEmpty()) {
            // double[] clusterCentroid = this.getCenter();
            double[] clusterVariance = this.getVariances();
            for (int i = 0; i < centroid.length; ++i) {
                centroid[i] /= (double) this.instances.size() + 1.0;
            }
            for (double[] sf : this.symbolFrequencies) {
                if (sf != null) {
                    Arrays.fill(sf, 0);
                }
            }
            for (Instance x : this.instances) { // Pre-populate univeral cluster with data points
                if (x == null) {
                    System.out.println("Sieve::MaximizationStep() - x is NULL!");
                    continue;
                }
                this.gtLabelFrequencies[(int) x.classValue()]++;
                this.labelFrequencies[(int) x.classValue()] += x.weight();
                this.numLabeledPoints += x.weight();
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    double val = xValues[i];
                    centroid[i] += val / ((double) this.instances.size() + 1.0);
                    if ((this.symbolFrequencies[i] != null) && (val < this.symbolFrequencies[i].length)) {
                        this.symbolFrequencies[i][(int) val]++;
                    }
                }
            } // for

            // Set 'centroid' to 'mode' (most frequent symbol) for nominal data:
            for (int i = 0; i < this.symbolFrequencies.length; ++i) {
                if (this.symbolFrequencies[i] != null) {
                    centroid[i] = weka.core.Utils.maxIndex(this.symbolFrequencies[i]);
                }
            }
            setCenter(centroid); // temporary - start with standard gaussian, gets updated below
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            double n = instances.size();
            if (n > 1) {
                double[] cep = new double[centroid.length];
                Arrays.fill(cep, 0);
                Arrays.fill(clusterVariance, 0);
                for (Instance x : this.instances) {
                    if (x == null) {
                        System.out.println("Riffle::recompute() - x is null!");
                        continue;
                    }
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = (this.symbolFrequencies[i] == null) ? centroid[i] - xValues[i]
                                : (Math.abs(centroid[i] - xValues[i]) < 1e-32) ? 1 : 1e-20;
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
                setVariances(clusterVariance);
            } // end if (enough data for variance)
        } // end if(!instances.empty)
        recompute();
    } // end if(!instances null)
    return getRadius() * getEntropy();
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * Wrapper for parallel K-Means for processing warm-up data set
 * @param D Warm-up data set//from   www .j  av  a 2 s.  c  om
 * @param K number of clusters
 * @param useLabels if true, use
 * @return 
 */
protected Set<Riffle> batchCluster(List<Instance> D, int K, boolean useLabels) {
    assert K >= 2 : "Minimum number of clusters (K) is 2";
    int numAttributes = D.get(0).numAttributes();
    TreeSet<Riffle> ret = new TreeSet<>();
    TreeSet<Integer> labels = new TreeSet<>();
    TreeMap<Integer, TreeSet<Riffle>> potentialClusters = new TreeMap<>();
    //Create a potential cluster pool. Seperate into seperate pools by label if useLabels is set to true:
    for (Instance x : D) {
        int label = (useLabels) ? (int) x.classValue() : 0;
        labels.add(label);
        TreeSet<Riffle> clusterSet = potentialClusters.get(label);
        if (clusterSet == null) {
            clusterSet = new TreeSet<>();
        }
        clusterSet.add(this.createNewCluster(x));
        potentialClusters.put(label, clusterSet);
    }

    // Initialize following the K-Means++ approach:
    Riffle C = potentialClusters.firstEntry().getValue().first();
    ret.add(C);
    potentialClusters.firstEntry().getValue().remove(C);

    Iterator<Integer> labelIter = labels.iterator();
    while ((ret.size() < K) && !potentialClusters.isEmpty()) {
        if (!labelIter.hasNext()) {
            labelIter = labels.iterator();
        } // loop around as needed
        int pseudoLabel = labelIter.next();
        TreeSet<Riffle> clusterSet = potentialClusters.get(pseudoLabel);
        if (clusterSet.isEmpty()) {
            potentialClusters.remove(pseudoLabel);
            labelIter.remove();
            continue;
        }
        SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(clusterSet, C.toInstance());
        C = nearestClusters.last().getCluster();
        ret.add(C);
        clusterSet.remove(C);
    }
    potentialClusters.clear();

    // Iterate 
    final int maxIterations = 100;
    final double minDelta = 0.0001;
    int iteration = 0;
    double valIdxDelta = 1.0;
    ValIdxTupleType lastScore = null;
    while ((iteration < maxIterations) && (valIdxDelta > minDelta)) {
        iteration++;
        ret.parallelStream().forEach((c) -> {
            c.cleanTallies();
            if (c.instances == null) {
                c.instances = c.getHeader();
            }
            c.instances.clear();
        });

        // Expectation Step
        boolean wasAdded;
        for (Instance x : D) {
            SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(ret, x);
            wasAdded = false;
            int xLabel = (int) x.classValue();
            int cLabel = 0;
            if (useLabels) {
                // Add to nearest cluster with same label
                for (NearestClusterTuple nct : nearestClusters) {
                    cLabel = (int) nct.getCluster().getGroundTruth();
                    if (cLabel == xLabel) {
                        nct.getCluster().addInstance(x);
                        nct.getCluster().instances.add(x);
                        wasAdded = true;
                        //break;
                    }
                }
            }
            // just add to the closest cluster
            if (!wasAdded) {
                nearestClusters.last().getCluster().instances.add(x);
            }
        }

        // Maximization Step
        for (Riffle c : ret) {
            if (c.instances == null || c.instances.isEmpty()) {
                continue;
            }
            double[] clusterCentroid = new double[numAttributes];
            double[] clusterVariance = new double[numAttributes];
            for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
                }
            }
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            if (c.instances.size() < 2) {
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance
                }
            } else {
                double n = c.instances.size();
                double[] cep = new double[numAttributes];
                Arrays.fill(cep, 0);
                for (Instance x : c.instances) {
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = clusterCentroid[i] - xValues[i];
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
            }
            c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below
            c.setVariances(clusterVariance);
            c.recompute(); // this updates entropies and such
            //                double[] clusterCentroid = new double[numAttributes];
            //                Arrays.fill(clusterCentroid, 0);
            //                for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
            //                    double[] xValues = x.toDoubleArray();
            //                    for (int i = 0; i < xValues.length; ++i) {
            //                        clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
            //                    }
            //                }
            //                c.setCenter(clusterCentroid);
        }

        ValIdxTupleType currentScore = new ValIdxTupleType(ret);
        if (lastScore != null) {
            double diff = Math.abs(lastScore.getValIdx() - currentScore.getValIdx());
            double denominator = lastScore.getValIdx();
            valIdxDelta = (denominator == 0) ? 0.0 : Math.abs(diff / denominator);
        }
        lastScore = currentScore;
    } // end while
    return ret;
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * Uses methodology from Kim et al. "A Novel Validity Index for Determination of the Optimal Number of Clusters"
 * @param D Warm-up data set//from   ww w.j  a  v a 2 s.c  o  m
 */
public void initialize(List<Instance> D) {
    assert (D == null || D.isEmpty() || D.get(0) == null) : "FeS::initialize() called with a null data list!";
    knownLabels.clear();
    universalProbabilitySums = 0;
    bestProbabilitySums = 0;
    bestProbabilityCount = 0;
    // Setup the universal set/cluster. Note that this will be crucial for subspace selection (cross-entropy checks against null hypothesis)
    double[] universalCentroid = new double[D.get(0).numAttributes()];
    double[] universalVariance = new double[D.get(0).numAttributes()];
    Arrays.fill(universalCentroid, 0);
    Arrays.fill(universalVariance, 0);
    universalCluster = new Riffle(D.get(0));
    universalCluster.updateStrategyOption.setChosenIndex(this.updateStrategyOption.getChosenIndex());
    universalCluster.outlierDefinitionStrategyOption
            .setChosenIndex(this.outlierDefinitionStrategyOption.getChosenIndex());
    universalCluster.distanceStrategyOption.setChosenIndex(this.distanceStrategyOption.getChosenIndex());
    universalCluster.initialStandardDeviationOption.setValue(this.initialStandardDeviationOption.getValue());
    universalCluster.alphaAdjustmentWeightOption.setValue(this.learningRateAlphaOption.getValue());
    //universalCluster.setParentClusterer(this);
    if (D.size() > 1) {
        double[] ep = new double[universalCentroid.length];
        Arrays.fill(ep, 0);
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance); // temporary - start with standard gaussian, will update below
        universalCluster.setWeight(0);
        double N = D.size();
        for (Instance x : D) { // Pre-populate univeral cluster with data points
            knownLabels.add((int) x.classValue());
            universalCluster.addInstance(x);
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                universalCentroid[i] += xValues[i];
            }
        }
        for (int i = 0; i < universalCentroid.length; ++i) {
            universalCentroid[i] /= N;
        }
        // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
        // we use the 2-Pass method for computing sample variance (per dimension)
        for (Instance x : D) {
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                double delta = universalCentroid[i] - xValues[i];
                ep[i] += delta;
                universalVariance[i] += delta * delta;
            }
        }
        for (int i = 0; i < universalVariance.length; ++i) {
            universalVariance[i] = (universalVariance[i] - ep[i] * ep[i] / N) / (N - 1);
        }
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance);
    }
    universalCluster.recompute(); // this updates entropies and such

    // Ok, now let's use K-Means to find the initial cluster set
    int Cmin = this.clustersPerLabelOption.getValue() * this.knownLabels.size();
    int Cmax = Cmin + 1;
    if (optimizeInitialClusterNumberOption.isSet()) {
        Cmin = this.minimumNumberOfClusterSizeOption.getValue();//Math.max(knownLabels.size(), 2);
        Cmax = Math.max(Cmin + 1, Math.min(this.clustersPerLabelOption.getValue() * this.knownLabels.size(),
                this.maximumNumberOfClusterSizeOption.getValue()));
    }
    ArrayList<ValIdxTupleType> valIdxSet = new ArrayList<>(Cmax);
    Set<Riffle> V;
    // Create multiple hypothesis for best K choices:
    for (int c = Cmin; c < Cmax; c++) {
        V = batchCluster(D, c, true);
        ValIdxTupleType i = new ValIdxTupleType(V);
        valIdxSet.add(i);
        if (CVI == null) {
            CVI = i;
        } else {
            CVI.setVo_min(Math.min(i.getVo(), CVI.getVo_min()));
            CVI.setVo_max(Math.max(i.getVo(), CVI.getVo_max()));
            CVI.setVu_min(Math.min(i.getVu(), CVI.getVu_min()));
            CVI.setVu_max(Math.max(i.getVu(), CVI.getVu_max()));
        }
    }

    // Normalize all:
    valIdxSet.parallelStream().map((i) -> {
        i.setVo_min(CVI.getVo_min());
        return i;
    }).map((i) -> {
        i.setVo_max(CVI.getVo_max());
        return i;
    }).map((i) -> {
        i.setVu_min(CVI.getVu_min());
        return i;
    }).forEach((i) -> {
        i.setVu_max(CVI.getVu_max());
    });

    // Find the best K by finding the minimum score:
    valIdxSet.stream().filter((i) -> (i.getValIdx() < CVI.getValIdx())).forEach((i) -> {
        CVI = i;
    });

    BufferedWriter datawriter = null; // DEBUG
    BufferedWriter rawdatawriter = null; // DEBUG
    BufferedWriter clusterwriter = null; // DEBUG
    String filePrefix = "DEBUG-" + iso8601FormatString.format(new Date()); // DEBUG
    try { // DEBUG
        File warmupData = new File((filePrefix + "-first" + D.size() + ".csv")); // DEBUG
        File rawwarmupData = new File((filePrefix + "-raw" + D.size() + ".csv")); // DEBUG
        File clusterData = new File((filePrefix + "-clusters.csv")); // DEBUG
        datawriter = new BufferedWriter(new FileWriter(warmupData)); // DEBUG
        rawdatawriter = new BufferedWriter(new FileWriter(rawwarmupData)); // DEBUG
        clusterwriter = new BufferedWriter(new FileWriter(clusterData)); // DEBUG
        clusterwriter.write("id,s,w,r,e,p,y,c,v"); // DEBUG
        clusterwriter.newLine(); // DEBUG
        String csv = ""; // DEBUG
        int rowCount = 0; // DEBUG
        for (Instance x : D) { // DEBUG
            double[] dataArray = x.toDoubleArray(); // DEBUG
            for (int dIdx = 0; dIdx < dataArray.length; ++dIdx) { // DEBUG
                csv += dataArray[dIdx] + ","; // DEBUG
            } // DEBUG
            csv += ++rowCount; // DEBUG
            rawdatawriter.write(csv); // DEBUG
            rawdatawriter.newLine(); // DEBUG
            csv = ""; // DEBUG
        } // DEBUG
        for (Double uvar : universalVariance) {
            csv += uvar + ",";
        }
        rawdatawriter.write(csv); // DEBUG
        rawdatawriter.newLine(); // DEBUG
        csv = "";
        for (Double umean : universalCentroid) {
            csv += umean + ",";
        }
        rawdatawriter.write(csv); // DEBUG
        rawdatawriter.newLine(); // DEBUG
        csv = "";
        rawdatawriter.flush();
        this.clusters.clear();
        for (Riffle c : CVI.getClustering()) {
            if (c.instances == null || c.instances.isEmpty()) {
                continue;
            }
            double[] clusterCentroid = new double[universalCentroid.length];
            double[] clusterVariance = new double[universalVariance.length];
            for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
                }
            }
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            if (c.instances.size() < 2) {
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance
                }
            } else {
                double n = c.instances.size();
                double[] cep = new double[universalCentroid.length];
                Arrays.fill(cep, 0);
                for (Instance x : c.instances) {
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = clusterCentroid[i] - xValues[i];
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
            }
            c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below
            c.setVariances(clusterVariance);
            c.recompute(); // this updates entropies and such

            // WRITE DEBUG DATA

            for (Instance x : c.instances) {
                double[] dataArray = x.toDoubleArray();
                for (int dIdx = 0; dIdx < dataArray.length; ++dIdx) {
                    csv += dataArray[dIdx] + ",";
                }
                csv += c.getId();
                datawriter.write(csv);
                datawriter.newLine();
                csv = "";
            }

            //              clusterwriter.write("id,w,r,e,p,y,c,v");
            if (Double.isNaN(c.getRadius())) {
                System.out.print("Bad radius");
            }
            clusterwriter.write(c.getId() + "," + c.size() + "," + c.getWeight() + "," + c.getRadius() + ","
                    + c.getEntropy() + "," + c.getTruePurity() + "," + weka.core.Utils.maxIndex(c.getVotes())
                    + ",Centroid:," + weka.core.Utils.arrayToString(c.getCenter()) + ",Var:,"
                    + weka.core.Utils.arrayToString(c.getVariances()));
            clusterwriter.newLine();
            // END DEBUG DATA

            this.clusters.add(c);
        }
        if (this.outlierDefinitionStrategyOption.getChosenIndex() == 1) {
            this.setupPerceptron();
            double outlierPerceptronTrainingError = this.trainPerceptron();
            System.out
                    .println("outlier detection Perceptron training error = " + outlierPerceptronTrainingError);
        }
        this.clusters.stream().forEach((c) -> {
            c.instances.clear();
        });
        this.newClusterCreateCalls = 0;
        System.out.println(
                "Starting with " + this.clusters.size() + " clusters and " + this.knownLabels + " labels.");

        clusterwriter.flush(); // DEBUG
        clusterwriter.close(); // DEBUG
        datawriter.flush(); // DEBUG
        datawriter.close(); // DEBUG
        rawdatawriter.flush(); // DEBUG
        rawdatawriter.close(); // DEBUG
    } catch (IOException e) {
    } // DEBUG
}