Example usage for weka.core Instance toDoubleArray

Introduction

In this page you can find the example usage for weka.core Instance toDoubleArray.

Prototype

public double[] toDoubleArray();

Source Link

Document

Returns the values of each attribute as an array of doubles.

Usage

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Use inclusion probability to discover the cluster "nearest" the provided instance
 * Uses main object's outlier container//from   w ww. ja va  2 s . com
 * @param x instance in question
 * @return sorted set of clusters, ordered by inc
 */
protected final NearestInstanceTuple[] findNearestOutliers(Instance x) {
    NearestInstanceTuple[] ret = new NearestInstanceTuple[potentialNovels.size()];
    double[] xVals = x.toDoubleArray();
    int idx = 0;
    for (Instance n : potentialNovels) {
        double distance = VectorDistances.distance(xVals, n.toDoubleArray(), x.dataset(),
                this.distanceStrategyOption.getChosenIndex());
        NearestInstanceTuple nit = new NearestInstanceTuple(n, distance);
        ret[idx++] = nit;
    } // end for
    Arrays.parallelSort(ret);
    return ret;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Uses methodology from Kim et al. "A Novel Validity Index for Determination of the Optimal Number of Clusters"
 *
 * @param D Warm-up data set//www . j  av  a 2 s  .  c  o m
 */
public final void initialize(List<Instance> D) {
    String ncCSVfilePrefix = "META-" + D.get(0).dataset().relationName() + "-"
            + iso8601FormatString.format(new Date());
    final boolean doMetaLog = logMetaRecordsOption.isSet();
    if (doMetaLog) {
        try {
            File ncCSVFile = new File(ncCSVfilePrefix + ".csv");
            ncCSVwriter = new BufferedWriter(new FileWriter(ncCSVFile));
            String ncCSVHeader = "" + "usize" + "," + "urad" + "," + "ctally" + "," + "cpur" + "," + "csize"
                    + "," + "cweight" + "," + "crad" + "," + "cdist" + "," + "pout" + "," + "vweight" + ","
                    + "qdmin" + "," + "qdout" + "," + "qnsc" + "," + "novel";
            ncCSVwriter.write(ncCSVHeader);
            ncCSVwriter.newLine();
            ncCSVwriter.flush();
        } catch (IOException fileSetupIOException) {
            System.err.println("NC-CSV meta-data file failed to open: " + fileSetupIOException.toString());
        }
    }
    knownLabels = new int[D.get(0).numClasses()];
    Arrays.fill(knownLabels, 0);
    this.numAttributes = D.get(0).numAttributes();
    universalProbabilitySums = 0;
    bestProbabilitySums = 0;
    bestProbabilityCount = 0;
    // Setup the universal set/cluster. Note that this will be crucial for subspace selection (cross-entropy checks against null hypothesis)
    double[] universalCentroid = new double[D.get(0).numAttributes()];
    double[] universalVariance = new double[D.get(0).numAttributes()];
    Arrays.fill(universalCentroid, 0);
    Arrays.fill(universalVariance, 0);
    universalCluster = new Riffle(D.get(0));
    //universalCluster.updateStrategyOption.setChosenIndex(this.updateStrategyOption.getChosenIndex());
    //universalCluster.outlierDefinitionStrategyOption.setChosenIndex(this.outlierDefinitionStrategyOption.getChosenIndex());
    universalCluster.distanceStrategyOption.setChosenIndex(this.distanceStrategyOption.getChosenIndex());
    //universalCluster.initialStandardDeviationOption.setValue(this.initialStandardDeviationOption.getValue());
    //universalCluster.alphaAdjustmentWeightOption.setValue(this.learningRateAlphaOption.getValue());
    //universalCluster.setParentClusterer(this);
    if (D.size() > 1) {
        double[] ep = new double[universalCentroid.length];
        Arrays.fill(ep, 0);
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance); // temporary - start with standard gaussian, will update below
        universalCluster.setWeight(0);
        double N = D.size();
        for (Instance x : D) { // Pre-populate univeral cluster with data points
            int y = (int) x.classValue();
            if (y < knownLabels.length) {
                knownLabels[y]++;
            }
            universalCluster.addInstance(x);
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                universalCentroid[i] += xValues[i];
            }
        }
        for (int i = 0; i < universalCentroid.length; ++i) {
            universalCentroid[i] /= N;
        }
        // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
        // we use the 2-Pass method for computing sample variance (per dimension)
        for (Instance x : D) {
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                double delta = universalCentroid[i] - xValues[i];
                ep[i] += delta;
                universalVariance[i] += delta * delta;
            }
        }
        for (int i = 0; i < universalVariance.length; ++i) {
            universalVariance[i] = (universalVariance[i] - ep[i] * ep[i] / N) / (N - 1);
        }
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance);
    }
    universalCluster.recompute(); // this updates entropies and such
    int numKnownLabels = 0;
    for (int y : knownLabels) {
        if (y > 0) {
            numKnownLabels++;
        }
    }
    // Ok, now let's use K-Means to find the initial cluster set
    int Cmin = this.clustersPerLabelOption.getValue() * numKnownLabels;
    int Cmax = Cmin + 1;
    if (optimizeInitialClusterNumberOption.isSet()) {
        Cmin = this.minimumNumberOfClusterSizeOption.getValue();//Math.max(knownLabels.size(), 2);
        Cmax = Math.max(Cmin + 1, Math.min(this.clustersPerLabelOption.getValue() * numKnownLabels,
                this.maximumNumberOfClusterSizeOption.getValue()));
    }
    ArrayList<ValIdxTupleType> valIdxSet = new ArrayList<>(Cmax);
    Set<Riffle> V;
    // Create multiple hypothesis for best K choices:
    for (int c = Cmin; c < Cmax; c++) {
        V = batchCluster(D, c, true);
        ValIdxTupleType i = new ValIdxTupleType(V);
        valIdxSet.add(i);
        if (CVI == null) {
            CVI = i;
        } else {
            CVI.setVo_min(Math.min(i.getVo(), CVI.getVo_min()));
            CVI.setVo_max(Math.max(i.getVo(), CVI.getVo_max()));
            CVI.setVu_min(Math.min(i.getVu(), CVI.getVu_min()));
            CVI.setVu_max(Math.max(i.getVu(), CVI.getVu_max()));
        }
    }

    // Normalize all:
    for (ValIdxTupleType i : valIdxSet) {
        i.setVo_min(CVI.getVo_min());
        i.setVo_max(CVI.getVo_max());
        i.setVu_min(CVI.getVu_min());
        i.setVu_max(CVI.getVu_max());
    }

    // Find the best K by finding the minimum score:
    valIdxSet.stream().filter((i) -> (i.getValIdx() < CVI.getValIdx())).forEach((i) -> {
        CVI = i;
    });

    this.clusters.clear();
    for (Riffle c : CVI.getClustering()) {
        if (c.instances == null || c.instances.isEmpty()) {
            continue;
        }
        double[] clusterCentroid = new double[universalCentroid.length];
        double[] clusterVariance = new double[universalVariance.length];
        for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
            }
        }
        // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
        // we use the 2-Pass method for computing sample variance (per dimension)
        if (c.instances.size() < 2) {
            for (int i = 0; i < clusterVariance.length; ++i) {
                clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance
            }
        } else {
            double n = c.instances.size();
            double[] cep = new double[universalCentroid.length];
            Arrays.fill(cep, 0);
            for (Instance x : c.instances) {
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    double delta = clusterCentroid[i] - xValues[i];
                    cep[i] += delta;
                    clusterVariance[i] += delta * delta; // Statistical Variance
                }
            }
            for (int i = 0; i < clusterVariance.length; ++i) {
                clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
            }
        }
        c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below
        c.setVariances(clusterVariance);
        c.recompute(); // this updates entropies and such
        for (Instance x : c.instances) {
            this.hopperCache.push(new ClusterPointPair(x, c));
        }
        this.clusters.add(c);
    }

    this.newClusterCreateCalls = 0;
    System.out.println("Starting with " + this.clusters.size() + " clusters.");
    instancesSeen = D.size();
    weightsSeen = D.size();
}

From source file:moa.clusterers.clustream.Clustream.java

License:Apache License

@Override
public void trainOnInstanceImpl(Instance instance) {
    int dim = instance.numValues();
    timestamp++;//from  w  w  w  .  jav a 2s.co  m
    // 0. Initialize
    if (!initialized) {
        if (buffer.size() < bufferSize) {
            buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m));
            return;
        }

        int k = kernels.length;
        //System.err.println("k="+k+" bufferSize="+bufferSize);
        assert (k <= bufferSize);

        ClustreamKernel[] centers = new ClustreamKernel[k];
        for (int i = 0; i < k; i++) {
            centers[i] = buffer.get(i); // TODO: make random!
        }
        Clustering kmeans_clustering = kMeans(k, centers, buffer);
        //         Clustering kmeans_clustering = kMeans(k, buffer);

        for (int i = 0; i < kmeans_clustering.size(); i++) {
            kernels[i] = new ClustreamKernel(new DenseInstance(1.0, centers[i].getCenter()), dim, timestamp, t,
                    m);
        }

        buffer.clear();
        initialized = true;
        return;
    }

    // 1. Determine closest kernel
    ClustreamKernel closestKernel = null;
    double minDistance = Double.MAX_VALUE;
    for (int i = 0; i < kernels.length; i++) {
        //System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation());
        double distance = distance(instance.toDoubleArray(), kernels[i].getCenter());
        if (distance < minDistance) {
            closestKernel = kernels[i];
            minDistance = distance;
        }
    }

    // 2. Check whether instance fits into closestKernel
    double radius = 0.0;
    if (closestKernel.getWeight() == 1) {
        // Special case: estimate radius by determining the distance to the
        // next closest cluster
        radius = Double.MAX_VALUE;
        double[] center = closestKernel.getCenter();
        for (int i = 0; i < kernels.length; i++) {
            if (kernels[i] == closestKernel) {
                continue;
            }

            double distance = distance(kernels[i].getCenter(), center);
            radius = Math.min(distance, radius);
        }
    } else {
        radius = closestKernel.getRadius();
    }

    if (minDistance < radius) {
        // Date fits, put into kernel and be happy
        closestKernel.insert(instance, timestamp);
        return;
    }

    // 3. Date does not fit, we need to free
    // some space to insert a new kernel
    long threshold = timestamp - timeWindow; // Kernels before this can be forgotten

    // 3.1 Try to forget old kernels
    for (int i = 0; i < kernels.length; i++) {
        if (kernels[i].getRelevanceStamp() < threshold) {
            kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m);
            return;
        }
    }

    // 3.2 Merge closest two kernels
    int closestA = 0;
    int closestB = 0;
    minDistance = Double.MAX_VALUE;
    for (int i = 0; i < kernels.length; i++) {
        double[] centerA = kernels[i].getCenter();
        for (int j = i + 1; j < kernels.length; j++) {
            double dist = distance(centerA, kernels[j].getCenter());
            if (dist < minDistance) {
                minDistance = dist;
                closestA = i;
                closestB = j;
            }
        }
    }
    assert (closestA != closestB);

    kernels[closestA].add(kernels[closestB]);
    kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m);
}

From source file:moa.clusterers.clustream.ClustreamKernel.java

License:Apache License

/**
 * See interface <code>Cluster</code>
 * @param point/*from w w  w .j a  v a  2  s . c  om*/
 * @return
 */
@Override
public double getInclusionProbability(Instance instance) {
    //trivial cluster
    if (N == 1) {
        double distance = 0.0;
        for (int i = 0; i < LS.length; i++) {
            double d = LS[i] - instance.value(i);
            distance += d * d;
        }
        distance = Math.sqrt(distance);
        if (distance < EPSILON)
            return 1.0;
        return 0.0;
    } else {
        double dist = calcNormalizedDistance(instance.toDoubleArray());
        if (dist <= getRadius()) {
            return 1;
        } else {
            return 0;
        }
        //            double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length);
        //            return res;
    }
}

From source file:moa.clusterers.clustream.WithKmeans.java

License:Apache License

@Override
public void trainOnInstanceImpl(Instance instance) {
    int dim = instance.numValues();
    timestamp++;//  www.  ja va  2 s  . c  om
    // 0. Initialize
    if (!initialized) {
        if (buffer.size() < bufferSize) {
            buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m));
            return;
        } else {
            for (int i = 0; i < buffer.size(); i++) {
                kernels[i] = new ClustreamKernel(new DenseInstance(1.0, buffer.get(i).getCenter()), dim,
                        timestamp, t, m);
            }

            buffer.clear();
            initialized = true;
            return;
        }
    }

    // 1. Determine closest kernel
    ClustreamKernel closestKernel = null;
    double minDistance = Double.MAX_VALUE;
    for (int i = 0; i < kernels.length; i++) {
        //System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation());
        double distance = distance(instance.toDoubleArray(), kernels[i].getCenter());
        if (distance < minDistance) {
            closestKernel = kernels[i];
            minDistance = distance;
        }
    }

    // 2. Check whether instance fits into closestKernel
    double radius = 0.0;
    if (closestKernel.getWeight() == 1) {
        // Special case: estimate radius by determining the distance to the
        // next closest cluster
        radius = Double.MAX_VALUE;
        double[] center = closestKernel.getCenter();
        for (int i = 0; i < kernels.length; i++) {
            if (kernels[i] == closestKernel) {
                continue;
            }

            double distance = distance(kernels[i].getCenter(), center);
            radius = Math.min(distance, radius);
        }
    } else {
        radius = closestKernel.getRadius();
    }

    if (minDistance < radius) {
        // Date fits, put into kernel and be happy
        closestKernel.insert(instance, timestamp);
        return;
    }

    // 3. Date does not fit, we need to free
    // some space to insert a new kernel
    long threshold = timestamp - timeWindow; // Kernels before this can be forgotten

    // 3.1 Try to forget old kernels
    for (int i = 0; i < kernels.length; i++) {
        if (kernels[i].getRelevanceStamp() < threshold) {
            kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m);
            return;
        }
    }

    // 3.2 Merge closest two kernels
    int closestA = 0;
    int closestB = 0;
    minDistance = Double.MAX_VALUE;
    for (int i = 0; i < kernels.length; i++) {
        double[] centerA = kernels[i].getCenter();
        for (int j = i + 1; j < kernels.length; j++) {
            double dist = distance(centerA, kernels[j].getCenter());
            if (dist < minDistance) {
                minDistance = dist;
                closestA = i;
                closestB = j;
            }
        }
    }
    assert (closestA != closestB);

    kernels[closestA].add(kernels[closestB]);
    kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m);
}

From source file:moa.clusterers.clustree.ClusKernel.java

License:Apache License

@Override
public double getInclusionProbability(Instance instance) {
    //trivial cluster
    if (N == 1) {
        double distance = 0.0;
        for (int i = 0; i < LS.length; i++) {
            double d = LS[i] - instance.value(i);
            distance += d * d;//from w w  w. j  a va  2  s. c o m
        }
        distance = Math.sqrt(distance);
        if (distance < EPSILON)
            return 1.0;
        return 0.0;
    } else {
        double dist = calcNormalizedDistance(instance.toDoubleArray());
        if (dist <= getRadius()) {
            return 1;
        } else {
            return 0;
        }
        //            double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length);
        //            return res;
    }
}

From source file:moa.clusterers.clustree.ClusTree.java

License:Apache License

@Override
public void trainOnInstanceImpl(Instance instance) {
    timestamp++;//from   w  w  w .j ava2s .c o m

    //TODO check if instance contains label
    if (root == null) {
        numberDimensions = instance.numAttributes();
        root = new Node(numberDimensions, 0);
    } else {
        if (numberDimensions != instance.numAttributes())
            System.out.println(
                    "Wrong dimensionality, expected:" + numberDimensions + "found:" + instance.numAttributes());
    }

    ClusKernel newPointAsKernel = new ClusKernel(instance.toDoubleArray(), numberDimensions);
    insert(newPointAsKernel, new SimpleBudget(1000), timestamp);
}

From source file:moa.clusterers.denstream.MicroCluster.java

License:Apache License

public MicroCluster(Instance instance, int dimensions, long timestamp, double lambda,
        Timestamp currentTimestamp) {
    this(instance.toDoubleArray(), dimensions, timestamp, lambda, currentTimestamp);
}

From source file:moa.clusterers.outliers.AnyOut.util.DataObject.java

License:Apache License

/**
 * Standard constructor for <code>DataObject</code>.
 * @param idCounter The id for the <code>DataObject</code>.
 * @param features The feature as a <code>double[]</code>
 * @param classLabel The label id for the <code>DataObject</code>.
 *///  w w  w . j  a va 2s  . c om
public DataObject(int idCounter, Instance inst) {
    this.id = idCounter;
    this.inst = inst;
    this.features = inst.toDoubleArray();
    this.classLabel = (int) inst.classValue();
    this.isOutiler = false;
}

From source file:moa.evaluation.CMM_GTAnalysis.java

License:Apache License

/**
 * Calculates Euclidian distance /*  w  w  w . j  av a 2  s  .c om*/
 * @param inst1 point as double array
 * @param inst2 point as double array
 * @return euclidian distance
 */
private double distance(Instance inst1, Instance inst2) {
    return distance(inst1, inst2.toDoubleArray());

}