Example usage for weka.core Instance dataset

Introduction

In this page you can find the example usage for weka.core Instance dataset.

Prototype

public Instances dataset();

Source Link

Document

Returns the dataset this instance has access to.

Usage

From source file:moa.classifiers.rules.AbstractAMRules.java

License:Apache License

protected void VerboseToConsole(Instance inst) {
    if (VerbosityOption.getValue() >= 5) {
        System.out.println();// w  ww. j a v  a2s  .co  m
        System.out.println("I) Dataset: " + inst.dataset().relationName());

        if (!this.unorderedRulesOption.isSet()) {
            System.out.println("I) Method Ordered");
        } else {
            System.out.println("I) Method Unordered");
        }
    }
}

From source file:moa.classifiers.trees.ePTTD.java

License:Creative Commons License

@Override
public void trainOnInstanceImpl(Instance inst) {
    // TODO Auto-generated method stub

    if (inst.weight() > 0.0) {
        this.trainingWeightSeenByModel += inst.weight();
    }/*from  ww w.j  a v  a2  s .c  o m*/

    if (!isClassificationEnabled) {

        if (instancesBuffer == null) {
            //this.instancesBuffer = new Instances(inst.dataset());
            this.instancesBuffer = new Instances(inst.dataset(), 0);

        }
        instancesBuffer.add(inst);

        if (instancesBuffer.size() == widthInitOption.getValue()) {
            //Build first time Classifier
            checkOptionsIntegity();
            this.ePTTDintern.buildClassifier(instancesBuffer);
            isClassificationEnabled = true;
        }
        return;
    }
    ePTTDintern.updateClassifier(inst);

}

From source file:moa.classifiers.WEKAClassifier.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    try {//w  w w.  jav  a2s .  c  o  m
        if (numberInstances == 0) {
            this.instancesBuffer = new Instances(inst.dataset());
            if (classifier instanceof UpdateableClassifier) {
                classifier.buildClassifier(instancesBuffer);
                this.isClassificationEnabled = true;
            } else {
                this.isBufferStoring = true;
            }
        }
        numberInstances++;

        if (classifier instanceof UpdateableClassifier) {
            if (numberInstances > 0) {
                ((UpdateableClassifier) classifier).updateClassifier(inst);
            }
        } else {
            if (numberInstances == widthInitOption.getValue()) {
                //Build first time Classifier
                buildClassifier();
                isClassificationEnabled = true;
                //Continue to store instances
                if (sampleFrequencyOption.getValue() != 0) {
                    isBufferStoring = true;
                }
            }
            if (widthOption.getValue() == 0) {
                //Used from SingleClassifierDrift
                if (isBufferStoring == true) {
                    instancesBuffer.add(inst);
                }
            } else {
                //Used form WekaClassifier without using SingleClassifierDrift
                int numInstances = numberInstances % sampleFrequencyOption.getValue();
                if (sampleFrequencyOption.getValue() == 0) {
                    numInstances = numberInstances;
                }
                if (numInstances == 0) {
                    //Begin to store instances
                    isBufferStoring = true;
                }
                if (isBufferStoring == true && numInstances <= widthOption.getValue()) {
                    //Store instances
                    instancesBuffer.add(inst);
                }
                if (numInstances == widthOption.getValue()) {
                    //Build Classifier
                    buildClassifier();
                    isClassificationEnabled = true;
                    this.instancesBuffer = new Instances(inst.dataset());
                }
            }
        }
    } catch (Exception e) {
        System.err.println("Training: " + e.getMessage());
    }
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Use outlier-criteria (selectable strategy) for determining if a data point is an outlier (unlikely member) of cluster
 *
 * @param x Instance for comparison to see if it is an outlier to this cluster
 * @return true if x is an outlier w.r.t. this cluster
 *///from ww w .  jav  a  2 s . c o  m
public final boolean isOutlier(Instance x) {
    boolean ret;
    double p = this.getInclusionProbability(x);
    switch (this.outlierDefinitionStrategyOption.getChosenIndex()) {
    case 0: //Use Chauvenet's Criteria to determine outlier standing of the data point for this cluster.
        ret = (p < getChauvenetLimit());
        break;
    case 1: // use Perceptron
        double[] v = embeddedClassifier.getVotesForInstance(x);
        try {
            weka.core.Utils.normalize(v);
        } catch (Exception e) {
        }
        int oIdx = x.dataset().classAttribute().indexOfValue(AbstractNovelClassClassifier.OUTLIER_LABEL_STR);
        double po = (v.length > oIdx) ? v[oIdx] : 0;
        if (po <= 0) {
            v[oIdx] = 0;
        }
        int h = weka.core.Utils.maxIndex(v);
        double ph = v[h];
        double margin = (po - ph);
        ret = (po > ph) && (margin > (2.0 / v.length));
        break;
    case 2: // 2.5 sigma
        ret = (p < weka.core.FastStats.normalProbability(2.5));
        break;
    case 3: // 3 sigma
        ret = (p < weka.core.FastStats.normalProbability(3));
        break;
    case 4: // 6 sigma 
        ret = (p < weka.core.FastStats.normalProbability(6));
        break;
    case 5: // cheat
        ret = p > 0.5;
        break;
    default:
        ret = p < weka.core.FastStats.normalProbability(2.5);
    }
    return ret;
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Sanity check and initialization of dynamic fields
 *
 * @param x//  w ww  .  j  a v a 2s . c  o  m
 */
protected final void safeInit(Instance x) {
    if (this.embeddedLearnerOption.getValueAsCLIString().contains("Majority class")) {
        this.excludeOutlierVoting = true;
    }
    if (centroid == null) {
        centroid = x.toDoubleArray();
    }
    if (this.instances == null) {
        prepareEmbeddedClassifier();
        ArrayList<Attribute> attribs = new ArrayList<>();
        this.symbolFrequencies = new double[x.dataset().numAttributes()][];
        for (int i = 0; i < x.dataset().numAttributes(); ++i) {
            Attribute a = (Attribute) x.dataset().attribute(i).copy();
            if (i == x.classIndex()) {
                a.setWeight(0.0);
            } else {
                a.setWeight(1.0);
            }
            switch (a.type()) {
            case Attribute.STRING:
            case Attribute.NOMINAL:
                //UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i));
                this.symbolFrequencies[i] = new double[a.numValues()];
                break;
            case Attribute.NUMERIC:
            case Attribute.RELATIONAL:
            case Attribute.DATE:
            default:
                // UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i));
                this.symbolFrequencies[i] = null;
            }
            attribs.add(a);
        }
        this.instances = new Instances("ClusterData", attribs, 1);
        this.instances.setClassIndex(x.classIndex());

    }
    //        else {
    //            for (int i = 0; i < x.dataset().numAttributes() && i < this.header.numAttributes(); ++i) {
    //                double val = x.value(i);
    //                Attribute a = this.header.attribute(i);
    //                // expand range as necessary
    //                if (val < a.getLowerNumericBound() || val > a.getUpperNumericBound()){
    //                    UnsafeUtils.setAttributeRange(a, Math.min(val,a.getLowerNumericBound()), Math.max(val,a.getUpperNumericBound()));
    //                }
    //                // increase frequency counts if new string value is encountered
    //                if (a.type() == Attribute.STRING && (val >= Math.max(this.symbolFrequencies[i].length, a.numValues()))) {
    //                    double newArray[] = new double[Math.max(this.symbolFrequencies[i].length, a.numValues())];
    //                    Arrays.fill(newArray, 0);
    //                    for(int j = 0; j <= this.symbolFrequencies[i].length; j++) {
    //                        newArray[j] = this.symbolFrequencies[i][j];
    //                    }
    //                    this.symbolFrequencies[i] = newArray;
    //                }
    //            }
    //        }
    if (this.variances == null) {
        this.variances = new double[x.numAttributes()];
        Arrays.fill(this.variances, 1);
    }
    if (this.entropies == null) {
        this.entropies = new double[x.numAttributes()];
        Arrays.fill(this.entropies, 0);
    }
    if (this.labelFrequencies == null) {
        this.labelFrequencies = new double[x.numClasses()];
        Arrays.fill(this.labelFrequencies, 0);
    }
    if (this.gtLabelFrequencies == null) {
        this.gtLabelFrequencies = new double[x.numClasses()];
        Arrays.fill(this.gtLabelFrequencies, 0);
    }
    if (this.rho == null) {
        this.rho = new double[x.numAttributes()];
        Arrays.fill(this.rho, 0);
    }
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Use inclusion probability to discover the cluster "nearest" the provided instance
 * Uses main object's outlier container/*from  w  w  w.ja  v a  2 s .co m*/
 * @param x instance in question
 * @return sorted set of clusters, ordered by inc
 */
protected final NearestInstanceTuple[] findNearestOutliers(Instance x) {
    NearestInstanceTuple[] ret = new NearestInstanceTuple[potentialNovels.size()];
    double[] xVals = x.toDoubleArray();
    int idx = 0;
    for (Instance n : potentialNovels) {
        double distance = VectorDistances.distance(xVals, n.toDoubleArray(), x.dataset(),
                this.distanceStrategyOption.getChosenIndex());
        NearestInstanceTuple nit = new NearestInstanceTuple(n, distance);
        ret[idx++] = nit;
    } // end for
    Arrays.parallelSort(ret);
    return ret;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * In cases where this class is not used by the moa.tasks.EvaluateNonStationaryDynamicStream task, this safety
 * (fallback) initialization procedure is necessary.
 *
 * @param x//  ww  w. j  a v a 2s  .c om
 */
public final void safeInit(Instance x) {
    if (this.universalCluster == null) {
        universalCluster = new Riffle(x);
        universalCluster.distanceStrategyOption.setChosenIndex(this.distanceStrategyOption.getChosenIndex());
        double[] initialVariances = new double[x.numAttributes()];
        Arrays.fill(initialVariances, 1.0);
        universalCluster.setVariances(initialVariances);
        universalCluster.setWeight(0);
        universalCluster.recompute();
        bestProbabilitySums = 0;
        bestProbabilityCount = 0;
    }
    if (this.knownLabels == null) {
        this.knownLabels = new int[x.numClasses()];
        Arrays.fill(knownLabels, 0);
        this.numAttributes = x.numAttributes();
    }
    if (this.header == null) {
        this.header = AbstractNovelClassClassifier.augmentInstances(x.dataset());
    }
}

From source file:moa.evaluation.BasicClassificationPerformanceEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] classVotes) {
    double weight = inst.weight();
    int trueClass = (int) inst.classValue();
    if (weight > 0.0) {
        if (this.weightObserved == 0) {
            reset(inst.dataset().numClasses());
        }//from w w w  . ja va  2 s . co  m
        this.weightObserved += weight;
        int predictedClass = Utils.maxIndex(classVotes);
        if (predictedClass == trueClass) {
            this.weightCorrect += weight;
        }
        this.rowKappa[predictedClass] += weight;
        this.columnKappa[trueClass] += weight;
    }
    if (this.lastSeenClass == trueClass) {
        this.weightCorrectNoChangeClassifier += weight;
    }
    this.lastSeenClass = trueClass;
}

From source file:moa.evaluation.BasicClassificationScoringEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] classVotes) {
    double weight = inst.weight();
    int trueClass = (int) inst.classValue();
    if (weight > 0.0) {
        if (this.weightObserved == 0) {
            reset(inst.dataset().numClasses());
        }/*  w w w . ja  v  a  2s . c  o m*/
        this.weightObserved += weight;

        //MSE Calculus
        int predictedClass = Utils.maxIndex(classVotes);
        if (predictedClass == trueClass) {
            this.weightCorrect += weight;
        }

        double[] normalized = normalize(classVotes);

        double vote = 0;
        if (normalized.length > 0) {
            vote = trueClass < normalized.length ? normalized[trueClass] : 0;
        }

        if (Double.compare(vote, Double.NaN) == 0) {
            int countNaN = 0;
            for (int i = 0; i < classVotes.length; ++i) {
                if (Double.compare(normalized[i], Double.NaN) == 0) {
                    countNaN++;
                }
            }
            vote = 1;
            if (countNaN > 1 && classVotes.length > 1) {
                vote = 1.0 / countNaN;
            }

        }
        this.mse += 1 - vote;

        this.saw++;

        this.rowKappa[predictedClass] += weight;
        this.columnKappa[trueClass] += weight;
    }
}

From source file:moa.evaluation.ClassificationWithNovelClassPerformanceEvaluator.java

License:Open Source License

/**
 * /*from   w  ww .j  a  va2  s .  c o m*/
 * Note that for novel class testing, an addition class value is added to the known classes. T
 * This extra "Label" represents a prediction of "Novel Class". This approach allows for
 * algorithms that do not have novel class prediction capabilities to still function,
 * as this method first bounds checks to see if the prediction array includes the added label
 * 
 * @param inst instance under test
 * @param classVotes prediction table for this instance
 */
@Override
public void addResult(Instance inst, double[] classVotes) {
    if (header == null) {
        header = AbstractNovelClassClassifier.augmentInstances(inst.dataset());
        this.novelClassLabel = header.classAttribute()
                .indexOfValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR);
        this.outlierLabel = header.classAttribute()
                .indexOfValue(AbstractNovelClassClassifier.OUTLIER_LABEL_STR);
        this.rowKappa = new double[header.numClasses()];
        Arrays.fill(this.rowKappa, 0.0);
        this.columnKappa = new double[header.numClasses()];
        Arrays.fill(this.columnKappa, 0.0);
        this.knownTrueLabels = new int[header.numClasses()];
        Arrays.fill(knownTrueLabels, 0);
        this.observedLabels = new int[header.numClasses()];
        Arrays.fill(observedLabels, 0);
    }

    final int trueClass = (int) inst.classValue();
    if (classVotes == null) {
        this.knownTrueLabels[trueClass]++;
        return;
    }
    final double[] labelsOnlyVotes = Arrays.copyOf(classVotes, inst.numClasses());
    if (labelsOnlyVotes.length > this.novelClassLabel) {
        labelsOnlyVotes[novelClassLabel] = 0;
    }
    if (labelsOnlyVotes.length > this.outlierLabel) {
        labelsOnlyVotes[outlierLabel] = 0;
    }
    final double totalVoteQty = weka.core.Utils.sum(labelsOnlyVotes);
    final int predictedClass = weka.core.Utils.maxIndex(labelsOnlyVotes); // Don't count the special extended indexes for novel and outlier
    final boolean isMarkedOutlier = (weka.core.Utils.maxIndex(classVotes) == this.outlierLabel);

    if (predictedClass < inst.numClasses() && labelsOnlyVotes[predictedClass] > 0.0) { // Only if there is SOME vote (non-zero)
        this.observedLabels[predictedClass]++; // If we predict it, then it can't be novel!
    }
    //final boolean isTrueNovel = !(this.observedLabels[(int)trueClass] > observationsUntilNotNovelOption.getValue());
    boolean predictedNovel = ((classVotes.length > this.novelClassLabel)
            && (classVotes[this.novelClassLabel] > 0));// this.thresholdOfNoveltyOption.getValue()));

    final boolean isVoteOutlier = (totalVoteQty <= (weka.core.Utils.SMALL * 10.0));
    final boolean correctLabelPrediction = (predictedClass == trueClass);
    switch (this.outlierHandlingStrategyOption.getChosenIndex()) {
    case 0: // use anyway
        // keep on trucking... 
        break;
    case 1: // ignore marked
        if (isMarkedOutlier) {
            return;
        }
        break;
    case 2: // ignore no vote
        if (isVoteOutlier) {
            return;
        }
        break;
    case 3: // ignore iff marked AND no vote
        if (isVoteOutlier && isMarkedOutlier) {
            return;
        }
        break;
    case 4: // ignore pure OR marked
        if (isVoteOutlier || isMarkedOutlier) {
            return;
        }
        break;
    case 5: // mark as novel
        predictedNovel = predictedNovel || isMarkedOutlier;
        break;
    default:
        break;
    }
    this.numberOfInstancesSeen++;
    this.weightObserved += inst.weight(); // /!\ IS THIS RIGHT???
    //final boolean isTrueNovel = (this.knownTrueLabels[trueClass] < this.maxUnobservationsUntilNotNovelOption.getValue()) && (this.observedLabels[trueClass] < observationsUntilNotNovelOption.getValue());
    final boolean isTrueNovel = (this.knownTrueLabels[trueClass] < this.maxUnobservationsUntilNotNovelOption
            .getValue());
    // 8x different mutually exclusive options (i.e. 3-bits)
    if ((!predictedNovel) && (!isTrueNovel) && (correctLabelPrediction)) { // Should be most common
        this.novelClassDetectionTrueNegative++;
        this.weightCorrect++;
    }
    if ((predictedNovel) && (isTrueNovel) && (correctLabelPrediction)) { // Rare if ever
        this.novelClassDetectionTruePositive++;
        this.weightCorrect++;
        assert false : "Paradox 1 - true novel, but predicted the right label";
    }
    if ((predictedNovel) && (!isTrueNovel) && (correctLabelPrediction)) { // Error due to overly restrictive models
        this.novelClassDetectionFalsePositive++;
        if (this.goodIsGoodOption.isSet()) {
            this.weightCorrect++;
        }
    }
    if ((!predictedNovel) && (isTrueNovel) && (correctLabelPrediction)) { // Should never happen?  Framework was wrong here, so TN
        this.novelClassDetectionTrueNegative++;
        this.weightCorrect++;
        assert false : "Paradox 2 - true novel, but predicted the right label";
    }
    if ((predictedNovel) && (isTrueNovel) && (!correctLabelPrediction)) { // Should be most common when x is novel
        this.novelClassDetectionTruePositive++;
        this.weightCorrect++;
    }
    if ((predictedNovel) && (!isTrueNovel) && (!correctLabelPrediction)) { // Probably an Outlier case
        this.novelClassDetectionFalsePositive++;
        if (this.outlierHandlingStrategyOption.getChosenIndex() > 0) {
            this.weightCorrect++;
        }
    }
    if ((!predictedNovel) && (isTrueNovel) && (!correctLabelPrediction)) { // NCD failure     FN
        this.novelClassDetectionFalseNegative++;
    }
    if ((!predictedNovel) && (!isTrueNovel) && (!correctLabelPrediction)) { // Correct NCD, but bad h(x) prediction
        this.novelClassDetectionTrueNegative++;
    }

    this.rowKappa[predictedClass]++;
    this.columnKappa[trueClass]++;
    this.knownTrueLabels[trueClass] += inst.weight();

}