Example usage for weka.core Instance toDoubleArray

List of usage examples for weka.core Instance toDoubleArray

Introduction

In this page you can find the example usage for weka.core Instance toDoubleArray.

Prototype

public double[] toDoubleArray();

Source Link

Document

Returns the values of each attribute as an array of doubles.

Usage

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Use inclusion probability to discover the cluster "nearest" the provided instance
 * Uses main object's outlier container//from   w ww. ja va  2 s . com
 * @param x instance in question
 * @return sorted set of clusters, ordered by inc
 */
protected final NearestInstanceTuple[] findNearestOutliers(Instance x) {
    NearestInstanceTuple[] ret = new NearestInstanceTuple[potentialNovels.size()];
    double[] xVals = x.toDoubleArray();
    int idx = 0;
    for (Instance n : potentialNovels) {
        double distance = VectorDistances.distance(xVals, n.toDoubleArray(), x.dataset(),
                this.distanceStrategyOption.getChosenIndex());
        NearestInstanceTuple nit = new NearestInstanceTuple(n, distance);
        ret[idx++] = nit;
    } // end for
    Arrays.parallelSort(ret);
    return ret;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Uses methodology from Kim et al. "A Novel Validity Index for Determination of the Optimal Number of Clusters"
 *
 * @param D Warm-up data set//www . j  av  a 2 s  .  c  o m
 */
public final void initialize(List<Instance> D) {
    String ncCSVfilePrefix = "META-" + D.get(0).dataset().relationName() + "-"
            + iso8601FormatString.format(new Date());
    final boolean doMetaLog = logMetaRecordsOption.isSet();
    if (doMetaLog) {
        try {
            File ncCSVFile = new File(ncCSVfilePrefix + ".csv");
            ncCSVwriter = new BufferedWriter(new FileWriter(ncCSVFile));
            String ncCSVHeader = "" + "usize" + "," + "urad" + "," + "ctally" + "," + "cpur" + "," + "csize"
                    + "," + "cweight" + "," + "crad" + "," + "cdist" + "," + "pout" + "," + "vweight" + ","
                    + "qdmin" + "," + "qdout" + "," + "qnsc" + "," + "novel";
            ncCSVwriter.write(ncCSVHeader);
            ncCSVwriter.newLine();
            ncCSVwriter.flush();
        } catch (IOException fileSetupIOException) {
            System.err.println("NC-CSV meta-data file failed to open: " + fileSetupIOException.toString());
        }
    }
    knownLabels = new int[D.get(0).numClasses()];
    Arrays.fill(knownLabels, 0);
    this.numAttributes = D.get(0).numAttributes();
    universalProbabilitySums = 0;
    bestProbabilitySums = 0;
    bestProbabilityCount = 0;
    // Setup the universal set/cluster. Note that this will be crucial for subspace selection (cross-entropy checks against null hypothesis)
    double[] universalCentroid = new double[D.get(0).numAttributes()];
    double[] universalVariance = new double[D.get(0).numAttributes()];
    Arrays.fill(universalCentroid, 0);
    Arrays.fill(universalVariance, 0);
    universalCluster = new Riffle(D.get(0));
    //universalCluster.updateStrategyOption.setChosenIndex(this.updateStrategyOption.getChosenIndex());
    //universalCluster.outlierDefinitionStrategyOption.setChosenIndex(this.outlierDefinitionStrategyOption.getChosenIndex());
    universalCluster.distanceStrategyOption.setChosenIndex(this.distanceStrategyOption.getChosenIndex());
    //universalCluster.initialStandardDeviationOption.setValue(this.initialStandardDeviationOption.getValue());
    //universalCluster.alphaAdjustmentWeightOption.setValue(this.learningRateAlphaOption.getValue());
    //universalCluster.setParentClusterer(this);
    if (D.size() > 1) {
        double[] ep = new double[universalCentroid.length];
        Arrays.fill(ep, 0);
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance); // temporary - start with standard gaussian, will update below
        universalCluster.setWeight(0);
        double N = D.size();
        for (Instance x : D) { // Pre-populate univeral cluster with data points
            int y = (int) x.classValue();
            if (y < knownLabels.length) {
                knownLabels[y]++;
            }
            universalCluster.addInstance(x);
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                universalCentroid[i] += xValues[i];
            }
        }
        for (int i = 0; i < universalCentroid.length; ++i) {
            universalCentroid[i] /= N;
        }
        // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
        // we use the 2-Pass method for computing sample variance (per dimension)
        for (Instance x : D) {
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                double delta = universalCentroid[i] - xValues[i];
                ep[i] += delta;
                universalVariance[i] += delta * delta;
            }
        }
        for (int i = 0; i < universalVariance.length; ++i) {
            universalVariance[i] = (universalVariance[i] - ep[i] * ep[i] / N) / (N - 1);
        }
        universalCluster.setCenter(universalCentroid); // temporary - start with standard gaussian, gets updated below
        universalCluster.setVariances(universalVariance);
    }
    universalCluster.recompute(); // this updates entropies and such
    int numKnownLabels = 0;
    for (int y : knownLabels) {
        if (y > 0) {
            numKnownLabels++;
        }
    }
    // Ok, now let's use K-Means to find the initial cluster set
    int Cmin = this.clustersPerLabelOption.getValue() * numKnownLabels;
    int Cmax = Cmin + 1;
    if (optimizeInitialClusterNumberOption.isSet()) {
        Cmin = this.minimumNumberOfClusterSizeOption.getValue();//Math.max(knownLabels.size(), 2);
        Cmax = Math.max(Cmin + 1, Math.min(this.clustersPerLabelOption.getValue() * numKnownLabels,
                this.maximumNumberOfClusterSizeOption.getValue()));
    }
    ArrayList<ValIdxTupleType> valIdxSet = new ArrayList<>(Cmax);
    Set<Riffle> V;
    // Create multiple hypothesis for best K choices:
    for (int c = Cmin; c < Cmax; c++) {
        V = batchCluster(D, c, true);
        ValIdxTupleType i = new ValIdxTupleType(V);
        valIdxSet.add(i);
        if (CVI == null) {
            CVI = i;
        } else {
            CVI.setVo_min(Math.min(i.getVo(), CVI.getVo_min()));
            CVI.setVo_max(Math.max(i.getVo(), CVI.getVo_max()));
            CVI.setVu_min(Math.min(i.getVu(), CVI.getVu_min()));
            CVI.setVu_max(Math.max(i.getVu(), CVI.getVu_max()));
        }
    }

    // Normalize all:
    for (ValIdxTupleType i : valIdxSet) {
        i.setVo_min(CVI.getVo_min());
        i.setVo_max(CVI.getVo_max());
        i.setVu_min(CVI.getVu_min());
        i.setVu_max(CVI.getVu_max());
    }

    // Find the best K by finding the minimum score:
    valIdxSet.stream().filter((i) -> (i.getValIdx() < CVI.getValIdx())).forEach((i) -> {
        CVI = i;
    });

    this.clusters.clear();
    for (Riffle c : CVI.getClustering()) {
        if (c.instances == null || c.instances.isEmpty()) {
            continue;
        }
        double[] clusterCentroid = new double[universalCentroid.length];
        double[] clusterVariance = new double[universalVariance.length];
        for (Instance x : c.instances) { // Pre-populate univeral cluster with data points
            double[] xValues = x.toDoubleArray();
            for (int i = 0; i < xValues.length; ++i) {
                clusterCentroid[i] += xValues[i] / ((double) c.instances.size());
            }
        }
        // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
        // we use the 2-Pass method for computing sample variance (per dimension)
        if (c.instances.size() < 2) {
            for (int i = 0; i < clusterVariance.length; ++i) {
                clusterVariance[i] = universalCluster.getVariances()[i] * 0.85; // Statistical Variance
            }
        } else {
            double n = c.instances.size();
            double[] cep = new double[universalCentroid.length];
            Arrays.fill(cep, 0);
            for (Instance x : c.instances) {
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    double delta = clusterCentroid[i] - xValues[i];
                    cep[i] += delta;
                    clusterVariance[i] += delta * delta; // Statistical Variance
                }
            }
            for (int i = 0; i < clusterVariance.length; ++i) {
                clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
            }
        }
        c.setCenter(clusterCentroid); // temporary - start with standard gaussian, gets updated below
        c.setVariances(clusterVariance);
        c.recompute(); // this updates entropies and such
        for (Instance x : c.instances) {
            this.hopperCache.push(new ClusterPointPair(x, c));
        }
        this.clusters.add(c);
    }

    this.newClusterCreateCalls = 0;
    System.out.println("Starting with " + this.clusters.size() + " clusters.");
    instancesSeen = D.size();
    weightsSeen = D.size();
}

From source file:moa.clusterers.clustream.Clustream.java

License:Apache License

@Override
public void trainOnInstanceImpl(Instance instance) {
    int dim = instance.numValues();
    timestamp++;//from  w  w  w  .  jav a 2s.co  m
    // 0. Initialize
    if (!initialized) {
        if (buffer.size() < bufferSize) {
            buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m));
            return;
        }

        int k = kernels.length;
        //System.err.println("k="+k+" bufferSize="+bufferSize);
        assert (k <= bufferSize);

        ClustreamKernel[] centers = new ClustreamKernel[k];
        for (int i = 0; i < k; i++) {
            centers[i] = buffer.get(i); // TODO: make random!
        }
        Clustering kmeans_clustering = kMeans(k, centers, buffer);
        //         Clustering kmeans_clustering = kMeans(k, buffer);

        for (int i = 0; i < kmeans_clustering.size(); i++) {
            kernels[i] = new ClustreamKernel(new DenseInstance(1.0, centers[i].getCenter()), dim, timestamp, t,
                    m);
        }

        buffer.clear();
        initialized = true;
        return;
    }

    // 1. Determine closest kernel
    ClustreamKernel closestKernel = null;
    double minDistance = Double.MAX_VALUE;
    for (int i = 0; i < kernels.length; i++) {
        //System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation());
        double distance = distance(instance.toDoubleArray(), kernels[i].getCenter());
        if (distance < minDistance) {
            closestKernel = kernels[i];
            minDistance = distance;
        }
    }

    // 2. Check whether instance fits into closestKernel
    double radius = 0.0;
    if (closestKernel.getWeight() == 1) {
        // Special case: estimate radius by determining the distance to the
        // next closest cluster
        radius = Double.MAX_VALUE;
        double[] center = closestKernel.getCenter();
        for (int i = 0; i < kernels.length; i++) {
            if (kernels[i] == closestKernel) {
                continue;
            }

            double distance = distance(kernels[i].getCenter(), center);
            radius = Math.min(distance, radius);
        }
    } else {
        radius = closestKernel.getRadius();
    }

    if (minDistance < radius) {
        // Date fits, put into kernel and be happy
        closestKernel.insert(instance, timestamp);
        return;
    }

    // 3. Date does not fit, we need to free
    // some space to insert a new kernel
    long threshold = timestamp - timeWindow; // Kernels before this can be forgotten

    // 3.1 Try to forget old kernels
    for (int i = 0; i < kernels.length; i++) {
        if (kernels[i].getRelevanceStamp() < threshold) {
            kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m);
            return;
        }
    }

    // 3.2 Merge closest two kernels
    int closestA = 0;
    int closestB = 0;
    minDistance = Double.MAX_VALUE;
    for (int i = 0; i < kernels.length; i++) {
        double[] centerA = kernels[i].getCenter();
        for (int j = i + 1; j < kernels.length; j++) {
            double dist = distance(centerA, kernels[j].getCenter());
            if (dist < minDistance) {
                minDistance = dist;
                closestA = i;
                closestB = j;
            }
        }
    }
    assert (closestA != closestB);

    kernels[closestA].add(kernels[closestB]);
    kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m);
}

From source file:moa.clusterers.clustream.ClustreamKernel.java

License:Apache License

/**
 * See interface <code>Cluster</code>
 * @param point/*from w w  w .j a  v a  2  s . c  om*/
 * @return
 */
@Override
public double getInclusionProbability(Instance instance) {
    //trivial cluster
    if (N == 1) {
        double distance = 0.0;
        for (int i = 0; i < LS.length; i++) {
            double d = LS[i] - instance.value(i);
            distance += d * d;
        }
        distance = Math.sqrt(distance);
        if (distance < EPSILON)
            return 1.0;
        return 0.0;
    } else {
        double dist = calcNormalizedDistance(instance.toDoubleArray());
        if (dist <= getRadius()) {
            return 1;
        } else {
            return 0;
        }
        //            double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length);
        //            return res;
    }
}

From source file:moa.clusterers.clustream.WithKmeans.java

License:Apache License

@Override
public void trainOnInstanceImpl(Instance instance) {
    int dim = instance.numValues();
    timestamp++;//  www.  ja va  2 s  . c  om
    // 0. Initialize
    if (!initialized) {
        if (buffer.size() < bufferSize) {
            buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m));
            return;
        } else {
            for (int i = 0; i < buffer.size(); i++) {
                kernels[i] = new ClustreamKernel(new DenseInstance(1.0, buffer.get(i).getCenter()), dim,
                        timestamp, t, m);
            }

            buffer.clear();
            initialized = true;
            return;
        }
    }

    // 1. Determine closest kernel
    ClustreamKernel closestKernel = null;
    double minDistance = Double.MAX_VALUE;
    for (int i = 0; i < kernels.length; i++) {
        //System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation());
        double distance = distance(instance.toDoubleArray(), kernels[i].getCenter());
        if (distance < minDistance) {
            closestKernel = kernels[i];
            minDistance = distance;
        }
    }

    // 2. Check whether instance fits into closestKernel
    double radius = 0.0;
    if (closestKernel.getWeight() == 1) {
        // Special case: estimate radius by determining the distance to the
        // next closest cluster
        radius = Double.MAX_VALUE;
        double[] center = closestKernel.getCenter();
        for (int i = 0; i < kernels.length; i++) {
            if (kernels[i] == closestKernel) {
                continue;
            }

            double distance = distance(kernels[i].getCenter(), center);
            radius = Math.min(distance, radius);
        }
    } else {
        radius = closestKernel.getRadius();
    }

    if (minDistance < radius) {
        // Date fits, put into kernel and be happy
        closestKernel.insert(instance, timestamp);
        return;
    }

    // 3. Date does not fit, we need to free
    // some space to insert a new kernel
    long threshold = timestamp - timeWindow; // Kernels before this can be forgotten

    // 3.1 Try to forget old kernels
    for (int i = 0; i < kernels.length; i++) {
        if (kernels[i].getRelevanceStamp() < threshold) {
            kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m);
            return;
        }
    }

    // 3.2 Merge closest two kernels
    int closestA = 0;
    int closestB = 0;
    minDistance = Double.MAX_VALUE;
    for (int i = 0; i < kernels.length; i++) {
        double[] centerA = kernels[i].getCenter();
        for (int j = i + 1; j < kernels.length; j++) {
            double dist = distance(centerA, kernels[j].getCenter());
            if (dist < minDistance) {
                minDistance = dist;
                closestA = i;
                closestB = j;
            }
        }
    }
    assert (closestA != closestB);

    kernels[closestA].add(kernels[closestB]);
    kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m);
}

From source file:moa.clusterers.clustree.ClusKernel.java

License:Apache License

@Override
public double getInclusionProbability(Instance instance) {
    //trivial cluster
    if (N == 1) {
        double distance = 0.0;
        for (int i = 0; i < LS.length; i++) {
            double d = LS[i] - instance.value(i);
            distance += d * d;//from w w  w. j  a va  2  s. c o m
        }
        distance = Math.sqrt(distance);
        if (distance < EPSILON)
            return 1.0;
        return 0.0;
    } else {
        double dist = calcNormalizedDistance(instance.toDoubleArray());
        if (dist <= getRadius()) {
            return 1;
        } else {
            return 0;
        }
        //            double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length);
        //            return res;
    }
}

From source file:moa.clusterers.clustree.ClusTree.java

License:Apache License

@Override
public void trainOnInstanceImpl(Instance instance) {
    timestamp++;//from   w  w  w .j ava2s .c o m

    //TODO check if instance contains label
    if (root == null) {
        numberDimensions = instance.numAttributes();
        root = new Node(numberDimensions, 0);
    } else {
        if (numberDimensions != instance.numAttributes())
            System.out.println(
                    "Wrong dimensionality, expected:" + numberDimensions + "found:" + instance.numAttributes());
    }

    ClusKernel newPointAsKernel = new ClusKernel(instance.toDoubleArray(), numberDimensions);
    insert(newPointAsKernel, new SimpleBudget(1000), timestamp);
}

From source file:moa.clusterers.denstream.MicroCluster.java

License:Apache License

public MicroCluster(Instance instance, int dimensions, long timestamp, double lambda,
        Timestamp currentTimestamp) {
    this(instance.toDoubleArray(), dimensions, timestamp, lambda, currentTimestamp);
}

From source file:moa.clusterers.outliers.AnyOut.util.DataObject.java

License:Apache License

/**
 * Standard constructor for <code>DataObject</code>.
 * @param idCounter The id for the <code>DataObject</code>.
 * @param features The feature as a <code>double[]</code>
 * @param classLabel The label id for the <code>DataObject</code>.
 *///  w w  w . j  a va 2s  . c om
public DataObject(int idCounter, Instance inst) {
    this.id = idCounter;
    this.inst = inst;
    this.features = inst.toDoubleArray();
    this.classLabel = (int) inst.classValue();
    this.isOutiler = false;
}

From source file:moa.evaluation.CMM_GTAnalysis.java

License:Apache License

/**
 * Calculates Euclidian distance /*  w  w  w . j  av a 2  s  .c om*/
 * @param inst1 point as double array
 * @param inst2 point as double array
 * @return euclidian distance
 */
private double distance(Instance inst1, Instance inst2) {
    return distance(inst1, inst2.toDoubleArray());

}