Example usage for weka.core Instance toDoubleArray

Introduction

In this page you can find the example usage for weka.core Instance toDoubleArray.

Prototype

public double[] toDoubleArray();

Source Link

Document

Returns the values of each attribute as an array of doubles.

Usage

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Inverse process of adding instance Note that caller function takes care of object recompute() call. We just need to
 * update the specific changes here/*  ww w  .j a v  a  2 s.c o  m*/
 *
 * @param x
 */
final protected void removeInstanceViaShephard(Instance x) {
    // multi-dimensional extension to Data Analysis 4th Ed Ch. 2 (Shepherd)
    if (this.numTotalPoints > 0) {
        double runningDeviation = this.getCenterDistance(x);
        double newPoint[] = x.toDoubleArray();
        for (int i = 0; i < centroid.length; ++i) {
            if (this.symbolFrequencies[i] == null) {
                double attributeDist = newPoint[i] - centroid[i];
                centroid[i] = centroid[i] - attributeDist / this.numTotalPoints;
            } else {
                int newVal = (int) newPoint[i];
                if (newVal < this.symbolFrequencies[i].length) {
                    this.symbolFrequencies[i][(int) newPoint[i]]--;
                }
                centroid[i] = weka.core.Utils.maxIndex(symbolFrequencies[i]);
            }
        }
        this.setCenter(centroid);
        this.runningSumOfSquares -= runningDeviation * this.getCenterDistance(x);
    }
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 *
 * @param x//from  www .  j  a v a  2  s.c o m
 * @return
 */
@Override
final public double getCenterDistance(Instance x) {
    if (this.distanceStrategyOption.getChosenIndex() == 13) {
        return 1.0 - this.getInclusionProbability(x);
    } else {
        double[] src = x.toDoubleArray();
        return VectorDistances.distance(src, centroid, this.instances,
                this.distanceStrategyOption.getChosenIndex());
    }
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Set pre-computed information fields/*from w w  w .jav  a 2s .c om*/
 * @return 
 */
public final double recomputeAll() {
    if (this.instances != null) {
        Arrays.fill(this.gtLabelFrequencies, 0);
        Arrays.fill(this.labelFrequencies, 0);
        this.numTotalPoints = instances.size();
        this.numLabeledPoints = 0;
        if (!this.instances.isEmpty()) {
            // double[] clusterCentroid = this.getCenter();
            double[] clusterVariance = this.getVariances();
            for (int i = 0; i < centroid.length; ++i) {
                centroid[i] /= (double) this.instances.size() + 1.0;
            }
            for (double[] sf : this.symbolFrequencies) {
                if (sf != null) {
                    Arrays.fill(sf, 0);
                }
            }
            for (Instance x : this.instances) { // Pre-populate univeral cluster with data points
                if (x == null) {
                    System.out.println("Sieve::MaximizationStep() - x is NULL!");
                    continue;
                }
                this.gtLabelFrequencies[(int) x.classValue()]++;
                this.labelFrequencies[(int) x.classValue()] += x.weight();
                this.numLabeledPoints += x.weight();
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    double val = xValues[i];
                    centroid[i] += val / ((double) this.instances.size() + 1.0);
                    if ((this.symbolFrequencies[i] != null) && (val < this.symbolFrequencies[i].length)) {
                        this.symbolFrequencies[i][(int) val]++;
                    }
                }
            } // for

            // Set 'centroid' to 'mode' (most frequent symbol) for nominal data:
            for (int i = 0; i < this.symbolFrequencies.length; ++i) {
                if (this.symbolFrequencies[i] != null) {
                    centroid[i] = weka.core.Utils.maxIndex(this.symbolFrequencies[i]);
                }
            }
            setCenter(centroid); // temporary - start with standard gaussian, gets updated below
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            double n = instances.size();
            if (n > 1) {
                double[] cep = new double[centroid.length];
                Arrays.fill(cep, 0);
                Arrays.fill(clusterVariance, 0);
                for (Instance x : this.instances) {
                    if (x == null) {
                        System.out.println("Riffle::recompute() - x is null!");
                        continue;
                    }
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = (this.symbolFrequencies[i] == null) ? centroid[i] - xValues[i]
                                : (Math.abs(centroid[i] - xValues[i]) < 1e-32) ? 1 : 1e-20;
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
                setVariances(clusterVariance);
            } // end if (enough data for variance)
        } // end if(!instances.empty)
        recompute();
    } // end if(!instances null)
    return getRadius() * getEntropy();
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Sanity check and initialization of dynamic fields
 *
 * @param x//  ww w.  j  a  v a2 s . c  o m
 */
protected final void safeInit(Instance x) {
    if (this.embeddedLearnerOption.getValueAsCLIString().contains("Majority class")) {
        this.excludeOutlierVoting = true;
    }
    if (centroid == null) {
        centroid = x.toDoubleArray();
    }
    if (this.instances == null) {
        prepareEmbeddedClassifier();
        ArrayList<Attribute> attribs = new ArrayList<>();
        this.symbolFrequencies = new double[x.dataset().numAttributes()][];
        for (int i = 0; i < x.dataset().numAttributes(); ++i) {
            Attribute a = (Attribute) x.dataset().attribute(i).copy();
            if (i == x.classIndex()) {
                a.setWeight(0.0);
            } else {
                a.setWeight(1.0);
            }
            switch (a.type()) {
            case Attribute.STRING:
            case Attribute.NOMINAL:
                //UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i));
                this.symbolFrequencies[i] = new double[a.numValues()];
                break;
            case Attribute.NUMERIC:
            case Attribute.RELATIONAL:
            case Attribute.DATE:
            default:
                // UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i));
                this.symbolFrequencies[i] = null;
            }
            attribs.add(a);
        }
        this.instances = new Instances("ClusterData", attribs, 1);
        this.instances.setClassIndex(x.classIndex());

    }
    //        else {
    //            for (int i = 0; i < x.dataset().numAttributes() && i < this.header.numAttributes(); ++i) {
    //                double val = x.value(i);
    //                Attribute a = this.header.attribute(i);
    //                // expand range as necessary
    //                if (val < a.getLowerNumericBound() || val > a.getUpperNumericBound()){
    //                    UnsafeUtils.setAttributeRange(a, Math.min(val,a.getLowerNumericBound()), Math.max(val,a.getUpperNumericBound()));
    //                }
    //                // increase frequency counts if new string value is encountered
    //                if (a.type() == Attribute.STRING && (val >= Math.max(this.symbolFrequencies[i].length, a.numValues()))) {
    //                    double newArray[] = new double[Math.max(this.symbolFrequencies[i].length, a.numValues())];
    //                    Arrays.fill(newArray, 0);
    //                    for(int j = 0; j <= this.symbolFrequencies[i].length; j++) {
    //                        newArray[j] = this.symbolFrequencies[i][j];
    //                    }
    //                    this.symbolFrequencies[i] = newArray;
    //                }
    //            }
    //        }
    if (this.variances == null) {
        this.variances = new double[x.numAttributes()];
        Arrays.fill(this.variances, 1);
    }
    if (this.entropies == null) {
        this.entropies = new double[x.numAttributes()];
        Arrays.fill(this.entropies, 0);
    }
    if (this.labelFrequencies == null) {
        this.labelFrequencies = new double[x.numClasses()];
        Arrays.fill(this.labelFrequencies, 0);
    }
    if (this.gtLabelFrequencies == null) {
        this.gtLabelFrequencies = new double[x.numClasses()];
        Arrays.fill(this.gtLabelFrequencies, 0);
    }
    if (this.rho == null) {
        this.rho = new double[x.numAttributes()];
        Arrays.fill(this.rho, 0);
    }
}

From source file:moa.cluster.SphereCluster.java

License:Apache License

public SphereCluster(List<? extends Instance> instances, int dimension) {
    this();/*from  www.j av a  2s .c om*/
    if (instances == null || instances.size() <= 0)
        return;

    weight = instances.size();

    Miniball mb = new Miniball(dimension);
    mb.clear();

    for (Instance instance : instances) {
        mb.check_in(instance.toDoubleArray());
    }

    mb.build();
    center = mb.center();
    radius = mb.radius();
    mb.clear();
}

From source file:moa.cluster.SphereCluster.java

License:Apache License

public double[] getDistanceVector(Instance instance) {
    return distanceVector(getCenter(), instance.toDoubleArray());
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * Wrapper for parallel K-Means for processing warm-up data set
 * @param D Warm-up data set/*from   w w  w.  j a v  a 2  s.  c o m*/
 * @param K number of clusters
 * @param useLabels if true, use
 * @return 
 */
protected Set<Riffle> batchCluster(List<Instance> D, int K, boolean useLabels) {
    assert K >= 2 : "Minimum number of clusters (K) is 2";
    int numAttributes = D.get(0).numAttributes();
    TreeSet<Riffle> ret = new TreeSet<>();
    TreeSet<Integer> labels = new TreeSet<>();
    TreeMap<Integer, TreeSet<Riffle>> potentialClusters = new TreeMap<>();
    //Create a potential cluster pool. Seperate into seperate pools by label if useLabels is set to true:
    for (Instance x : D) {
        int label = (useLabels) ? (int) x.classValue() : 0;
        labels.add(label);
        TreeSet<Riffle> clusterSet = potentialClusters.get(label);
        if (clusterSet == null) {
            clusterSet = new TreeSet<>();
        }
        clusterSet.add(this.createNewCluster(x));
        potentialClusters.put(label, clusterSet);
    }

    // Initialize following the K-Means++ approach:
    Riffle C = potentialClusters.firstEntry().getValue().first();
    ret.add(C);
    potentialClusters.firstEntry().getValue().remove(C);

    Iterator<Integer> labelIter = labels.iterator();
    while ((ret.size() < K) && !potentialClusters.isEmpty()) {
        if (!labelIter.hasNext()) {
            labelIter = labels.iterator();
        } // loop around as needed
        int pseudoLabel = labelIter.next();
        TreeSet<Riffle> clusterSet = potentialClusters.get(pseudoLabel);
        if (clusterSet.isEmpty()) {
            potentialClusters.remove(pseudoLabel);
            labelIter.remove();
            continue;
        }
        SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(clusterSet, C.toInstance());
        C = nearestClusters.last().getCluster();
        ret.add(C);
        clusterSet.remove(C);
    }
    potentialClusters.clear();

    // Iterate 
    final int maxIterations = 100;
    final double minDelta = 0.0001;
    int iteration = 0;
    double valIdxDelta = 1.0;
    ValIdxTupleType lastScore = null;
    while ((iteration < maxIterations) && (valIdxDelta > minDelta)) {
        iteration++;
        ret.parallelStream().forEach((c) -> {
            c.cleanTallies();
            if (c.instances == null) {
                c.instances = c.getHeader();
            }
            c.instances.clear();
        });

        // Expectation Step
        boolean wasAdded;
        for (Instance x : D) {
            SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(ret, x);
            wasAdded = false;
            int xLabel = (int) x.classValue();
            int cLabel = 0;
            if (useLabels) {
                // Add to nearest cluster with same label
                for (NearestClusterTuple nct : nearestClusters) {
                    cLabel = (int) nct.getCluster().getGroundTruth();
                    if (cLabel == xLabel) {
                        nct.getCluster().addInstance(x);
                        nct.getCluster().instances.add(x);
                        wasAdded = true;
                        //break;
                    }
                }
            }
            // just add to the closest cluster
            if (!wasAdded) {
                nearestClusters.last().getCluster().instances.add(x);
            }
        }

        // Maximization Step
        for (Riffle c : ret) {
            if (c.instances == null || c.instances.isEmpty()) {
                continue;
            }
            double[] clusterCentroid = new double[numAttributes];
            double[] clusterVariance = new double[numAttributes];
            for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
                }
            }
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            if (c.instances.size() < 2) {
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance
                }
            } else {
                double n = c.instances.size();
                double[] cep = new double[numAttributes];
                Arrays.fill(cep, 0);
                for (Instance x : c.instances) {
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = clusterCentroid[i] - xValues[i];
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
            }
            c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below
            c.setVariances(clusterVariance);
            c.recompute(); // this updates entropies and such
            //                double[] clusterCentroid = new double[numAttributes];
            //                Arrays.fill(clusterCentroid, 0);
            //                for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
            //                    double[] xValues = x.toDoubleArray();
            //                    for (int i = 0; i < xValues.length; ++i) {
            //                        clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
            //                    }
            //                }
            //                c.setCenter(clusterCentroid);
        }

        ValIdxTupleType currentScore = new ValIdxTupleType(ret);
        if (lastScore != null) {
            double diff = Math.abs(lastScore.getValIdx() - currentScore.getValIdx());
            double denominator = lastScore.getValIdx();
            valIdxDelta = (denominator == 0) ? 0.0 : Math.abs(diff / denominator);
        }
        lastScore = currentScore;
    } // end while
    return ret;
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * Uses methodology from Kim et al. "A Novel Validity Index for Determination of the Optimal Number of Clusters"
 * @param D Warm-up data set/*from www . j ava  2 s .  c  om*/
 */
public void initialize(List<Instance> D) {
    assert (D == null || D.isEmpty() || D.get(0) == null) : "FeS::initialize() called with a null data list!";
    knownLabels.clear();
    universalProbabilitySums = 0;
    bestProbabilitySums = 0;
    bestProbabilityCount = 0;
    // Setup the universal set/cluster. Note that this will be crucial for subspace selection (cross-entropy checks against null hypothesis)
    double[] universalCentroid = new double[D.get(0).numAttributes()];
    double[] universalVariance = new double[D.get(0).numAttributes()];
    Arrays.fill(universalCentroid, 0);
    Arrays.fill(universalVariance, 0);
    universalCluster = new Riffle(D.get(0));
    universalCluster.updateStrategyOption.setChosenIndex(this.updateStrategyOption.getChosenIndex());
    universalCluster.outlierDefinitionStrategyOption
            .setChosenIndex(this.outlierDefinitionStrategyOption.getChosenIndex());
    universalCluster.distanceStrategyOption.setChosenIndex(this.distanceStrategyOption.getChosenIndex());
    universalCluster.initialStandardDeviationOption.setValue(this.initialStandardDeviationOption.getValue());
    universalCluster.alphaAdjustmentWeightOption.setValue(this.learningRateAlphaOption.getValue());
    //universalCluster.setParentClusterer(this);
    if (D.size() > 1) {
        double[] ep = new double[universalCentroid.length];
        Arrays.fill(ep, 0);
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance); // temporary - start with standard gaussian, will update below
        universalCluster.setWeight(0);
        double N = D.size();
        for (Instance x : D) { // Pre-populate univeral cluster with data points
            knownLabels.add((int) x.classValue());
            universalCluster.addInstance(x);
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                universalCentroid[i] += xValues[i];
            }
        }
        for (int i = 0; i < universalCentroid.length; ++i) {
            universalCentroid[i] /= N;
        }
        // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
        // we use the 2-Pass method for computing sample variance (per dimension)
        for (Instance x : D) {
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                double delta = universalCentroid[i] - xValues[i];
                ep[i] += delta;
                universalVariance[i] += delta * delta;
            }
        }
        for (int i = 0; i < universalVariance.length; ++i) {
            universalVariance[i] = (universalVariance[i] - ep[i] * ep[i] / N) / (N - 1);
        }
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance);
    }
    universalCluster.recompute(); // this updates entropies and such

    // Ok, now let's use K-Means to find the initial cluster set
    int Cmin = this.clustersPerLabelOption.getValue() * this.knownLabels.size();
    int Cmax = Cmin + 1;
    if (optimizeInitialClusterNumberOption.isSet()) {
        Cmin = this.minimumNumberOfClusterSizeOption.getValue();//Math.max(knownLabels.size(), 2);
        Cmax = Math.max(Cmin + 1, Math.min(this.clustersPerLabelOption.getValue() * this.knownLabels.size(),
                this.maximumNumberOfClusterSizeOption.getValue()));
    }
    ArrayList<ValIdxTupleType> valIdxSet = new ArrayList<>(Cmax);
    Set<Riffle> V;
    // Create multiple hypothesis for best K choices:
    for (int c = Cmin; c < Cmax; c++) {
        V = batchCluster(D, c, true);
        ValIdxTupleType i = new ValIdxTupleType(V);
        valIdxSet.add(i);
        if (CVI == null) {
            CVI = i;
        } else {
            CVI.setVo_min(Math.min(i.getVo(), CVI.getVo_min()));
            CVI.setVo_max(Math.max(i.getVo(), CVI.getVo_max()));
            CVI.setVu_min(Math.min(i.getVu(), CVI.getVu_min()));
            CVI.setVu_max(Math.max(i.getVu(), CVI.getVu_max()));
        }
    }

    // Normalize all:
    valIdxSet.parallelStream().map((i) -> {
        i.setVo_min(CVI.getVo_min());
        return i;
    }).map((i) -> {
        i.setVo_max(CVI.getVo_max());
        return i;
    }).map((i) -> {
        i.setVu_min(CVI.getVu_min());
        return i;
    }).forEach((i) -> {
        i.setVu_max(CVI.getVu_max());
    });

    // Find the best K by finding the minimum score:
    valIdxSet.stream().filter((i) -> (i.getValIdx() < CVI.getValIdx())).forEach((i) -> {
        CVI = i;
    });

    BufferedWriter datawriter = null; // DEBUG
    BufferedWriter rawdatawriter = null; // DEBUG
    BufferedWriter clusterwriter = null; // DEBUG
    String filePrefix = "DEBUG-" + iso8601FormatString.format(new Date()); // DEBUG
    try { // DEBUG
        File warmupData = new File((filePrefix + "-first" + D.size() + ".csv")); // DEBUG
        File rawwarmupData = new File((filePrefix + "-raw" + D.size() + ".csv")); // DEBUG
        File clusterData = new File((filePrefix + "-clusters.csv")); // DEBUG
        datawriter = new BufferedWriter(new FileWriter(warmupData)); // DEBUG
        rawdatawriter = new BufferedWriter(new FileWriter(rawwarmupData)); // DEBUG
        clusterwriter = new BufferedWriter(new FileWriter(clusterData)); // DEBUG
        clusterwriter.write("id,s,w,r,e,p,y,c,v"); // DEBUG
        clusterwriter.newLine(); // DEBUG
        String csv = ""; // DEBUG
        int rowCount = 0; // DEBUG
        for (Instance x : D) { // DEBUG
            double[] dataArray = x.toDoubleArray(); // DEBUG
            for (int dIdx = 0; dIdx < dataArray.length; ++dIdx) { // DEBUG
                csv += dataArray[dIdx] + ","; // DEBUG
            } // DEBUG
            csv += ++rowCount; // DEBUG
            rawdatawriter.write(csv); // DEBUG
            rawdatawriter.newLine(); // DEBUG
            csv = ""; // DEBUG
        } // DEBUG
        for (Double uvar : universalVariance) {
            csv += uvar + ",";
        }
        rawdatawriter.write(csv); // DEBUG
        rawdatawriter.newLine(); // DEBUG
        csv = "";
        for (Double umean : universalCentroid) {
            csv += umean + ",";
        }
        rawdatawriter.write(csv); // DEBUG
        rawdatawriter.newLine(); // DEBUG
        csv = "";
        rawdatawriter.flush();
        this.clusters.clear();
        for (Riffle c : CVI.getClustering()) {
            if (c.instances == null || c.instances.isEmpty()) {
                continue;
            }
            double[] clusterCentroid = new double[universalCentroid.length];
            double[] clusterVariance = new double[universalVariance.length];
            for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
                }
            }
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            if (c.instances.size() < 2) {
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance
                }
            } else {
                double n = c.instances.size();
                double[] cep = new double[universalCentroid.length];
                Arrays.fill(cep, 0);
                for (Instance x : c.instances) {
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = clusterCentroid[i] - xValues[i];
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
            }
            c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below
            c.setVariances(clusterVariance);
            c.recompute(); // this updates entropies and such

            // WRITE DEBUG DATA

            for (Instance x : c.instances) {
                double[] dataArray = x.toDoubleArray();
                for (int dIdx = 0; dIdx < dataArray.length; ++dIdx) {
                    csv += dataArray[dIdx] + ",";
                }
                csv += c.getId();
                datawriter.write(csv);
                datawriter.newLine();
                csv = "";
            }

            //              clusterwriter.write("id,w,r,e,p,y,c,v");
            if (Double.isNaN(c.getRadius())) {
                System.out.print("Bad radius");
            }
            clusterwriter.write(c.getId() + "," + c.size() + "," + c.getWeight() + "," + c.getRadius() + ","
                    + c.getEntropy() + "," + c.getTruePurity() + "," + weka.core.Utils.maxIndex(c.getVotes())
                    + ",Centroid:," + weka.core.Utils.arrayToString(c.getCenter()) + ",Var:,"
                    + weka.core.Utils.arrayToString(c.getVariances()));
            clusterwriter.newLine();
            // END DEBUG DATA

            this.clusters.add(c);
        }
        if (this.outlierDefinitionStrategyOption.getChosenIndex() == 1) {
            this.setupPerceptron();
            double outlierPerceptronTrainingError = this.trainPerceptron();
            System.out
                    .println("outlier detection Perceptron training error = " + outlierPerceptronTrainingError);
        }
        this.clusters.stream().forEach((c) -> {
            c.instances.clear();
        });
        this.newClusterCreateCalls = 0;
        System.out.println(
                "Starting with " + this.clusters.size() + " clusters and " + this.knownLabels + " labels.");

        clusterwriter.flush(); // DEBUG
        clusterwriter.close(); // DEBUG
        datawriter.flush(); // DEBUG
        datawriter.close(); // DEBUG
        rawdatawriter.flush(); // DEBUG
        rawdatawriter.close(); // DEBUG
    } catch (IOException e) {
    } // DEBUG
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Use inclusion probability to discover the cluster "nearest" the provided instance
 *
 * @param x instance in question//from   w w w  .  ja v  a  2s  .  c  o m
 * @param C set of clusters
 * @return sorted set of clusters, ordered by inc
 */
protected final NearestClusterTuple[] findMostLikelyClusters(Collection<Riffle> C, Instance x) {
    NearestClusterTuple[] ret = new NearestClusterTuple[C.size()];
    double[] xVals = x.toDoubleArray();
    int idx = 0;
    double dist = 0;
    for (Riffle c : C) {
        dist = c.getCenterDistance(xVals);
        ret[idx++] = new NearestClusterTuple(c, dist);
    } // end for
    Arrays.parallelSort(ret);
    return ret;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Use inclusion probability to discover the cluster "nearest" the provided instance
 *
 * @param D instance set to sort from//from   ww w. j a va2 s  .  com
 * @param x instance in question
 * @return sorted set of clusters, ordered by inc
 */
protected final NearestInstanceTuple[] findNearestNeighbors(Instances D, Instance x) {
    NearestInstanceTuple[] ret = new NearestInstanceTuple[D.size()];
    double[] xVals = x.toDoubleArray();
    int idx = 0;
    for (Instance n : D) {
        ret[idx++] = new NearestInstanceTuple(n, VectorDistances.distance(xVals, n.toDoubleArray(), D,
                this.distanceStrategyOption.getChosenIndex()));
    } // end for
    Arrays.parallelSort(ret);
    return ret;
}