Example usage for weka.core Instance toDoubleArray

List of usage examples for weka.core Instance toDoubleArray

Introduction

In this page you can find the example usage for weka.core Instance toDoubleArray.

Prototype

public double[] toDoubleArray();

Source Link

Document

Returns the values of each attribute as an array of doubles.

Usage

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Inverse process of adding instance Note that caller function takes care of object recompute() call. We just need to
 * update the specific changes here/*  ww w  .j a v  a  2 s.c o  m*/
 *
 * @param x
 */
final protected void removeInstanceViaShephard(Instance x) {
    // multi-dimensional extension to Data Analysis 4th Ed Ch. 2 (Shepherd)
    if (this.numTotalPoints > 0) {
        double runningDeviation = this.getCenterDistance(x);
        double newPoint[] = x.toDoubleArray();
        for (int i = 0; i < centroid.length; ++i) {
            if (this.symbolFrequencies[i] == null) {
                double attributeDist = newPoint[i] - centroid[i];
                centroid[i] = centroid[i] - attributeDist / this.numTotalPoints;
            } else {
                int newVal = (int) newPoint[i];
                if (newVal < this.symbolFrequencies[i].length) {
                    this.symbolFrequencies[i][(int) newPoint[i]]--;
                }
                centroid[i] = weka.core.Utils.maxIndex(symbolFrequencies[i]);
            }
        }
        this.setCenter(centroid);
        this.runningSumOfSquares -= runningDeviation * this.getCenterDistance(x);
    }
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 *
 * @param x//from  www .  j  a v a  2  s.c o m
 * @return
 */
@Override
final public double getCenterDistance(Instance x) {
    if (this.distanceStrategyOption.getChosenIndex() == 13) {
        return 1.0 - this.getInclusionProbability(x);
    } else {
        double[] src = x.toDoubleArray();
        return VectorDistances.distance(src, centroid, this.instances,
                this.distanceStrategyOption.getChosenIndex());
    }
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Set pre-computed information fields/*from w w  w .jav  a 2s .c om*/
 * @return 
 */
public final double recomputeAll() {
    if (this.instances != null) {
        Arrays.fill(this.gtLabelFrequencies, 0);
        Arrays.fill(this.labelFrequencies, 0);
        this.numTotalPoints = instances.size();
        this.numLabeledPoints = 0;
        if (!this.instances.isEmpty()) {
            // double[] clusterCentroid = this.getCenter();
            double[] clusterVariance = this.getVariances();
            for (int i = 0; i < centroid.length; ++i) {
                centroid[i] /= (double) this.instances.size() + 1.0;
            }
            for (double[] sf : this.symbolFrequencies) {
                if (sf != null) {
                    Arrays.fill(sf, 0);
                }
            }
            for (Instance x : this.instances) { // Pre-populate univeral cluster with data points
                if (x == null) {
                    System.out.println("Sieve::MaximizationStep() - x is NULL!");
                    continue;
                }
                this.gtLabelFrequencies[(int) x.classValue()]++;
                this.labelFrequencies[(int) x.classValue()] += x.weight();
                this.numLabeledPoints += x.weight();
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    double val = xValues[i];
                    centroid[i] += val / ((double) this.instances.size() + 1.0);
                    if ((this.symbolFrequencies[i] != null) && (val < this.symbolFrequencies[i].length)) {
                        this.symbolFrequencies[i][(int) val]++;
                    }
                }
            } // for

            // Set 'centroid' to 'mode' (most frequent symbol) for nominal data:
            for (int i = 0; i < this.symbolFrequencies.length; ++i) {
                if (this.symbolFrequencies[i] != null) {
                    centroid[i] = weka.core.Utils.maxIndex(this.symbolFrequencies[i]);
                }
            }
            setCenter(centroid); // temporary - start with standard gaussian, gets updated below
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            double n = instances.size();
            if (n > 1) {
                double[] cep = new double[centroid.length];
                Arrays.fill(cep, 0);
                Arrays.fill(clusterVariance, 0);
                for (Instance x : this.instances) {
                    if (x == null) {
                        System.out.println("Riffle::recompute() - x is null!");
                        continue;
                    }
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = (this.symbolFrequencies[i] == null) ? centroid[i] - xValues[i]
                                : (Math.abs(centroid[i] - xValues[i]) < 1e-32) ? 1 : 1e-20;
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
                setVariances(clusterVariance);
            } // end if (enough data for variance)
        } // end if(!instances.empty)
        recompute();
    } // end if(!instances null)
    return getRadius() * getEntropy();
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Sanity check and initialization of dynamic fields
 *
 * @param x//  ww w.  j  a  v a2 s . c  o m
 */
protected final void safeInit(Instance x) {
    if (this.embeddedLearnerOption.getValueAsCLIString().contains("Majority class")) {
        this.excludeOutlierVoting = true;
    }
    if (centroid == null) {
        centroid = x.toDoubleArray();
    }
    if (this.instances == null) {
        prepareEmbeddedClassifier();
        ArrayList<Attribute> attribs = new ArrayList<>();
        this.symbolFrequencies = new double[x.dataset().numAttributes()][];
        for (int i = 0; i < x.dataset().numAttributes(); ++i) {
            Attribute a = (Attribute) x.dataset().attribute(i).copy();
            if (i == x.classIndex()) {
                a.setWeight(0.0);
            } else {
                a.setWeight(1.0);
            }
            switch (a.type()) {
            case Attribute.STRING:
            case Attribute.NOMINAL:
                //UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i));
                this.symbolFrequencies[i] = new double[a.numValues()];
                break;
            case Attribute.NUMERIC:
            case Attribute.RELATIONAL:
            case Attribute.DATE:
            default:
                // UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i));
                this.symbolFrequencies[i] = null;
            }
            attribs.add(a);
        }
        this.instances = new Instances("ClusterData", attribs, 1);
        this.instances.setClassIndex(x.classIndex());

    }
    //        else {
    //            for (int i = 0; i < x.dataset().numAttributes() && i < this.header.numAttributes(); ++i) {
    //                double val = x.value(i);
    //                Attribute a = this.header.attribute(i);
    //                // expand range as necessary
    //                if (val < a.getLowerNumericBound() || val > a.getUpperNumericBound()){
    //                    UnsafeUtils.setAttributeRange(a, Math.min(val,a.getLowerNumericBound()), Math.max(val,a.getUpperNumericBound()));
    //                }
    //                // increase frequency counts if new string value is encountered
    //                if (a.type() == Attribute.STRING && (val >= Math.max(this.symbolFrequencies[i].length, a.numValues()))) {
    //                    double newArray[] = new double[Math.max(this.symbolFrequencies[i].length, a.numValues())];
    //                    Arrays.fill(newArray, 0);
    //                    for(int j = 0; j <= this.symbolFrequencies[i].length; j++) {
    //                        newArray[j] = this.symbolFrequencies[i][j];
    //                    }
    //                    this.symbolFrequencies[i] = newArray;
    //                }
    //            }
    //        }
    if (this.variances == null) {
        this.variances = new double[x.numAttributes()];
        Arrays.fill(this.variances, 1);
    }
    if (this.entropies == null) {
        this.entropies = new double[x.numAttributes()];
        Arrays.fill(this.entropies, 0);
    }
    if (this.labelFrequencies == null) {
        this.labelFrequencies = new double[x.numClasses()];
        Arrays.fill(this.labelFrequencies, 0);
    }
    if (this.gtLabelFrequencies == null) {
        this.gtLabelFrequencies = new double[x.numClasses()];
        Arrays.fill(this.gtLabelFrequencies, 0);
    }
    if (this.rho == null) {
        this.rho = new double[x.numAttributes()];
        Arrays.fill(this.rho, 0);
    }
}

From source file:moa.cluster.SphereCluster.java

License:Apache License

public SphereCluster(List<? extends Instance> instances, int dimension) {
    this();/*from  www.j av a  2s .c om*/
    if (instances == null || instances.size() <= 0)
        return;

    weight = instances.size();

    Miniball mb = new Miniball(dimension);
    mb.clear();

    for (Instance instance : instances) {
        mb.check_in(instance.toDoubleArray());
    }

    mb.build();
    center = mb.center();
    radius = mb.radius();
    mb.clear();
}

From source file:moa.cluster.SphereCluster.java

License:Apache License

public double[] getDistanceVector(Instance instance) {
    return distanceVector(getCenter(), instance.toDoubleArray());
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * Wrapper for parallel K-Means for processing warm-up data set
 * @param D Warm-up data set/*from   w w  w.  j a v  a 2  s.  c o m*/
 * @param K number of clusters
 * @param useLabels if true, use
 * @return 
 */
protected Set<Riffle> batchCluster(List<Instance> D, int K, boolean useLabels) {
    assert K >= 2 : "Minimum number of clusters (K) is 2";
    int numAttributes = D.get(0).numAttributes();
    TreeSet<Riffle> ret = new TreeSet<>();
    TreeSet<Integer> labels = new TreeSet<>();
    TreeMap<Integer, TreeSet<Riffle>> potentialClusters = new TreeMap<>();
    //Create a potential cluster pool. Seperate into seperate pools by label if useLabels is set to true:
    for (Instance x : D) {
        int label = (useLabels) ? (int) x.classValue() : 0;
        labels.add(label);
        TreeSet<Riffle> clusterSet = potentialClusters.get(label);
        if (clusterSet == null) {
            clusterSet = new TreeSet<>();
        }
        clusterSet.add(this.createNewCluster(x));
        potentialClusters.put(label, clusterSet);
    }

    // Initialize following the K-Means++ approach:
    Riffle C = potentialClusters.firstEntry().getValue().first();
    ret.add(C);
    potentialClusters.firstEntry().getValue().remove(C);

    Iterator<Integer> labelIter = labels.iterator();
    while ((ret.size() < K) && !potentialClusters.isEmpty()) {
        if (!labelIter.hasNext()) {
            labelIter = labels.iterator();
        } // loop around as needed
        int pseudoLabel = labelIter.next();
        TreeSet<Riffle> clusterSet = potentialClusters.get(pseudoLabel);
        if (clusterSet.isEmpty()) {
            potentialClusters.remove(pseudoLabel);
            labelIter.remove();
            continue;
        }
        SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(clusterSet, C.toInstance());
        C = nearestClusters.last().getCluster();
        ret.add(C);
        clusterSet.remove(C);
    }
    potentialClusters.clear();

    // Iterate 
    final int maxIterations = 100;
    final double minDelta = 0.0001;
    int iteration = 0;
    double valIdxDelta = 1.0;
    ValIdxTupleType lastScore = null;
    while ((iteration < maxIterations) && (valIdxDelta > minDelta)) {
        iteration++;
        ret.parallelStream().forEach((c) -> {
            c.cleanTallies();
            if (c.instances == null) {
                c.instances = c.getHeader();
            }
            c.instances.clear();
        });

        // Expectation Step
        boolean wasAdded;
        for (Instance x : D) {
            SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(ret, x);
            wasAdded = false;
            int xLabel = (int) x.classValue();
            int cLabel = 0;
            if (useLabels) {
                // Add to nearest cluster with same label
                for (NearestClusterTuple nct : nearestClusters) {
                    cLabel = (int) nct.getCluster().getGroundTruth();
                    if (cLabel == xLabel) {
                        nct.getCluster().addInstance(x);
                        nct.getCluster().instances.add(x);
                        wasAdded = true;
                        //break;
                    }
                }
            }
            // just add to the closest cluster
            if (!wasAdded) {
                nearestClusters.last().getCluster().instances.add(x);
            }
        }

        // Maximization Step
        for (Riffle c : ret) {
            if (c.instances == null || c.instances.isEmpty()) {
                continue;
            }
            double[] clusterCentroid = new double[numAttributes];
            double[] clusterVariance = new double[numAttributes];
            for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
                }
            }
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            if (c.instances.size() < 2) {
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance
                }
            } else {
                double n = c.instances.size();
                double[] cep = new double[numAttributes];
                Arrays.fill(cep, 0);
                for (Instance x : c.instances) {
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = clusterCentroid[i] - xValues[i];
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
            }
            c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below
            c.setVariances(clusterVariance);
            c.recompute(); // this updates entropies and such
            //                double[] clusterCentroid = new double[numAttributes];
            //                Arrays.fill(clusterCentroid, 0);
            //                for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
            //                    double[] xValues = x.toDoubleArray();
            //                    for (int i = 0; i < xValues.length; ++i) {
            //                        clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
            //                    }
            //                }
            //                c.setCenter(clusterCentroid);
        }

        ValIdxTupleType currentScore = new ValIdxTupleType(ret);
        if (lastScore != null) {
            double diff = Math.abs(lastScore.getValIdx() - currentScore.getValIdx());
            double denominator = lastScore.getValIdx();
            valIdxDelta = (denominator == 0) ? 0.0 : Math.abs(diff / denominator);
        }
        lastScore = currentScore;
    } // end while
    return ret;
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * Uses methodology from Kim et al. "A Novel Validity Index for Determination of the Optimal Number of Clusters"
 * @param D Warm-up data set/*from www . j ava  2 s .  c  om*/
 */
public void initialize(List<Instance> D) {
    assert (D == null || D.isEmpty() || D.get(0) == null) : "FeS::initialize() called with a null data list!";
    knownLabels.clear();
    universalProbabilitySums = 0;
    bestProbabilitySums = 0;
    bestProbabilityCount = 0;
    // Setup the universal set/cluster. Note that this will be crucial for subspace selection (cross-entropy checks against null hypothesis)
    double[] universalCentroid = new double[D.get(0).numAttributes()];
    double[] universalVariance = new double[D.get(0).numAttributes()];
    Arrays.fill(universalCentroid, 0);
    Arrays.fill(universalVariance, 0);
    universalCluster = new Riffle(D.get(0));
    universalCluster.updateStrategyOption.setChosenIndex(this.updateStrategyOption.getChosenIndex());
    universalCluster.outlierDefinitionStrategyOption
            .setChosenIndex(this.outlierDefinitionStrategyOption.getChosenIndex());
    universalCluster.distanceStrategyOption.setChosenIndex(this.distanceStrategyOption.getChosenIndex());
    universalCluster.initialStandardDeviationOption.setValue(this.initialStandardDeviationOption.getValue());
    universalCluster.alphaAdjustmentWeightOption.setValue(this.learningRateAlphaOption.getValue());
    //universalCluster.setParentClusterer(this);
    if (D.size() > 1) {
        double[] ep = new double[universalCentroid.length];
        Arrays.fill(ep, 0);
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance); // temporary - start with standard gaussian, will update below
        universalCluster.setWeight(0);
        double N = D.size();
        for (Instance x : D) { // Pre-populate univeral cluster with data points
            knownLabels.add((int) x.classValue());
            universalCluster.addInstance(x);
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                universalCentroid[i] += xValues[i];
            }
        }
        for (int i = 0; i < universalCentroid.length; ++i) {
            universalCentroid[i] /= N;
        }
        // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
        // we use the 2-Pass method for computing sample variance (per dimension)
        for (Instance x : D) {
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                double delta = universalCentroid[i] - xValues[i];
                ep[i] += delta;
                universalVariance[i] += delta * delta;
            }
        }
        for (int i = 0; i < universalVariance.length; ++i) {
            universalVariance[i] = (universalVariance[i] - ep[i] * ep[i] / N) / (N - 1);
        }
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance);
    }
    universalCluster.recompute(); // this updates entropies and such

    // Ok, now let's use K-Means to find the initial cluster set
    int Cmin = this.clustersPerLabelOption.getValue() * this.knownLabels.size();
    int Cmax = Cmin + 1;
    if (optimizeInitialClusterNumberOption.isSet()) {
        Cmin = this.minimumNumberOfClusterSizeOption.getValue();//Math.max(knownLabels.size(), 2);
        Cmax = Math.max(Cmin + 1, Math.min(this.clustersPerLabelOption.getValue() * this.knownLabels.size(),
                this.maximumNumberOfClusterSizeOption.getValue()));
    }
    ArrayList<ValIdxTupleType> valIdxSet = new ArrayList<>(Cmax);
    Set<Riffle> V;
    // Create multiple hypothesis for best K choices:
    for (int c = Cmin; c < Cmax; c++) {
        V = batchCluster(D, c, true);
        ValIdxTupleType i = new ValIdxTupleType(V);
        valIdxSet.add(i);
        if (CVI == null) {
            CVI = i;
        } else {
            CVI.setVo_min(Math.min(i.getVo(), CVI.getVo_min()));
            CVI.setVo_max(Math.max(i.getVo(), CVI.getVo_max()));
            CVI.setVu_min(Math.min(i.getVu(), CVI.getVu_min()));
            CVI.setVu_max(Math.max(i.getVu(), CVI.getVu_max()));
        }
    }

    // Normalize all:
    valIdxSet.parallelStream().map((i) -> {
        i.setVo_min(CVI.getVo_min());
        return i;
    }).map((i) -> {
        i.setVo_max(CVI.getVo_max());
        return i;
    }).map((i) -> {
        i.setVu_min(CVI.getVu_min());
        return i;
    }).forEach((i) -> {
        i.setVu_max(CVI.getVu_max());
    });

    // Find the best K by finding the minimum score:
    valIdxSet.stream().filter((i) -> (i.getValIdx() < CVI.getValIdx())).forEach((i) -> {
        CVI = i;
    });

    BufferedWriter datawriter = null; // DEBUG
    BufferedWriter rawdatawriter = null; // DEBUG
    BufferedWriter clusterwriter = null; // DEBUG
    String filePrefix = "DEBUG-" + iso8601FormatString.format(new Date()); // DEBUG
    try { // DEBUG
        File warmupData = new File((filePrefix + "-first" + D.size() + ".csv")); // DEBUG
        File rawwarmupData = new File((filePrefix + "-raw" + D.size() + ".csv")); // DEBUG
        File clusterData = new File((filePrefix + "-clusters.csv")); // DEBUG
        datawriter = new BufferedWriter(new FileWriter(warmupData)); // DEBUG
        rawdatawriter = new BufferedWriter(new FileWriter(rawwarmupData)); // DEBUG
        clusterwriter = new BufferedWriter(new FileWriter(clusterData)); // DEBUG
        clusterwriter.write("id,s,w,r,e,p,y,c,v"); // DEBUG
        clusterwriter.newLine(); // DEBUG
        String csv = ""; // DEBUG
        int rowCount = 0; // DEBUG
        for (Instance x : D) { // DEBUG
            double[] dataArray = x.toDoubleArray(); // DEBUG
            for (int dIdx = 0; dIdx < dataArray.length; ++dIdx) { // DEBUG
                csv += dataArray[dIdx] + ","; // DEBUG
            } // DEBUG
            csv += ++rowCount; // DEBUG
            rawdatawriter.write(csv); // DEBUG
            rawdatawriter.newLine(); // DEBUG
            csv = ""; // DEBUG
        } // DEBUG
        for (Double uvar : universalVariance) {
            csv += uvar + ",";
        }
        rawdatawriter.write(csv); // DEBUG
        rawdatawriter.newLine(); // DEBUG
        csv = "";
        for (Double umean : universalCentroid) {
            csv += umean + ",";
        }
        rawdatawriter.write(csv); // DEBUG
        rawdatawriter.newLine(); // DEBUG
        csv = "";
        rawdatawriter.flush();
        this.clusters.clear();
        for (Riffle c : CVI.getClustering()) {
            if (c.instances == null || c.instances.isEmpty()) {
                continue;
            }
            double[] clusterCentroid = new double[universalCentroid.length];
            double[] clusterVariance = new double[universalVariance.length];
            for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
                }
            }
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            if (c.instances.size() < 2) {
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance
                }
            } else {
                double n = c.instances.size();
                double[] cep = new double[universalCentroid.length];
                Arrays.fill(cep, 0);
                for (Instance x : c.instances) {
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = clusterCentroid[i] - xValues[i];
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
            }
            c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below
            c.setVariances(clusterVariance);
            c.recompute(); // this updates entropies and such

            // WRITE DEBUG DATA

            for (Instance x : c.instances) {
                double[] dataArray = x.toDoubleArray();
                for (int dIdx = 0; dIdx < dataArray.length; ++dIdx) {
                    csv += dataArray[dIdx] + ",";
                }
                csv += c.getId();
                datawriter.write(csv);
                datawriter.newLine();
                csv = "";
            }

            //              clusterwriter.write("id,w,r,e,p,y,c,v");
            if (Double.isNaN(c.getRadius())) {
                System.out.print("Bad radius");
            }
            clusterwriter.write(c.getId() + "," + c.size() + "," + c.getWeight() + "," + c.getRadius() + ","
                    + c.getEntropy() + "," + c.getTruePurity() + "," + weka.core.Utils.maxIndex(c.getVotes())
                    + ",Centroid:," + weka.core.Utils.arrayToString(c.getCenter()) + ",Var:,"
                    + weka.core.Utils.arrayToString(c.getVariances()));
            clusterwriter.newLine();
            // END DEBUG DATA

            this.clusters.add(c);
        }
        if (this.outlierDefinitionStrategyOption.getChosenIndex() == 1) {
            this.setupPerceptron();
            double outlierPerceptronTrainingError = this.trainPerceptron();
            System.out
                    .println("outlier detection Perceptron training error = " + outlierPerceptronTrainingError);
        }
        this.clusters.stream().forEach((c) -> {
            c.instances.clear();
        });
        this.newClusterCreateCalls = 0;
        System.out.println(
                "Starting with " + this.clusters.size() + " clusters and " + this.knownLabels + " labels.");

        clusterwriter.flush(); // DEBUG
        clusterwriter.close(); // DEBUG
        datawriter.flush(); // DEBUG
        datawriter.close(); // DEBUG
        rawdatawriter.flush(); // DEBUG
        rawdatawriter.close(); // DEBUG
    } catch (IOException e) {
    } // DEBUG
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Use inclusion probability to discover the cluster "nearest" the provided instance
 *
 * @param x instance in question//from   w w w  .  ja v  a  2s  .  c  o m
 * @param C set of clusters
 * @return sorted set of clusters, ordered by inc
 */
protected final NearestClusterTuple[] findMostLikelyClusters(Collection<Riffle> C, Instance x) {
    NearestClusterTuple[] ret = new NearestClusterTuple[C.size()];
    double[] xVals = x.toDoubleArray();
    int idx = 0;
    double dist = 0;
    for (Riffle c : C) {
        dist = c.getCenterDistance(xVals);
        ret[idx++] = new NearestClusterTuple(c, dist);
    } // end for
    Arrays.parallelSort(ret);
    return ret;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Use inclusion probability to discover the cluster "nearest" the provided instance
 *
 * @param D instance set to sort from//from   ww w. j a va2 s  .  com
 * @param x instance in question
 * @return sorted set of clusters, ordered by inc
 */
protected final NearestInstanceTuple[] findNearestNeighbors(Instances D, Instance x) {
    NearestInstanceTuple[] ret = new NearestInstanceTuple[D.size()];
    double[] xVals = x.toDoubleArray();
    int idx = 0;
    for (Instance n : D) {
        ret[idx++] = new NearestInstanceTuple(n, VectorDistances.distance(xVals, n.toDoubleArray(), D,
                this.distanceStrategyOption.getChosenIndex()));
    } // end for
    Arrays.parallelSort(ret);
    return ret;
}