Example usage for weka.core Instance dataset

List of usage examples for weka.core Instance dataset

Introduction

In this page you can find the example usage for weka.core Instance dataset.

Prototype

public Instances dataset();

Source Link

Document

Returns the dataset this instance has access to.

Usage

From source file:moa.classifiers.rules.AbstractAMRules.java

License:Apache License

protected void VerboseToConsole(Instance inst) {
    if (VerbosityOption.getValue() >= 5) {
        System.out.println();// w  ww. j a v  a2s  .co  m
        System.out.println("I) Dataset: " + inst.dataset().relationName());

        if (!this.unorderedRulesOption.isSet()) {
            System.out.println("I) Method Ordered");
        } else {
            System.out.println("I) Method Unordered");
        }
    }
}

From source file:moa.classifiers.trees.ePTTD.java

License:Creative Commons License

@Override
public void trainOnInstanceImpl(Instance inst) {
    // TODO Auto-generated method stub

    if (inst.weight() > 0.0) {
        this.trainingWeightSeenByModel += inst.weight();
    }/*from  ww w.j  a v  a2  s .c  o m*/

    if (!isClassificationEnabled) {

        if (instancesBuffer == null) {
            //this.instancesBuffer = new Instances(inst.dataset());
            this.instancesBuffer = new Instances(inst.dataset(), 0);

        }
        instancesBuffer.add(inst);

        if (instancesBuffer.size() == widthInitOption.getValue()) {
            //Build first time Classifier
            checkOptionsIntegity();
            this.ePTTDintern.buildClassifier(instancesBuffer);
            isClassificationEnabled = true;
        }
        return;
    }
    ePTTDintern.updateClassifier(inst);

}

From source file:moa.classifiers.WEKAClassifier.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    try {//w  w w.  jav  a2s .  c  o  m
        if (numberInstances == 0) {
            this.instancesBuffer = new Instances(inst.dataset());
            if (classifier instanceof UpdateableClassifier) {
                classifier.buildClassifier(instancesBuffer);
                this.isClassificationEnabled = true;
            } else {
                this.isBufferStoring = true;
            }
        }
        numberInstances++;

        if (classifier instanceof UpdateableClassifier) {
            if (numberInstances > 0) {
                ((UpdateableClassifier) classifier).updateClassifier(inst);
            }
        } else {
            if (numberInstances == widthInitOption.getValue()) {
                //Build first time Classifier
                buildClassifier();
                isClassificationEnabled = true;
                //Continue to store instances
                if (sampleFrequencyOption.getValue() != 0) {
                    isBufferStoring = true;
                }
            }
            if (widthOption.getValue() == 0) {
                //Used from SingleClassifierDrift
                if (isBufferStoring == true) {
                    instancesBuffer.add(inst);
                }
            } else {
                //Used form WekaClassifier without using SingleClassifierDrift
                int numInstances = numberInstances % sampleFrequencyOption.getValue();
                if (sampleFrequencyOption.getValue() == 0) {
                    numInstances = numberInstances;
                }
                if (numInstances == 0) {
                    //Begin to store instances
                    isBufferStoring = true;
                }
                if (isBufferStoring == true && numInstances <= widthOption.getValue()) {
                    //Store instances
                    instancesBuffer.add(inst);
                }
                if (numInstances == widthOption.getValue()) {
                    //Build Classifier
                    buildClassifier();
                    isClassificationEnabled = true;
                    this.instancesBuffer = new Instances(inst.dataset());
                }
            }
        }
    } catch (Exception e) {
        System.err.println("Training: " + e.getMessage());
    }
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Use outlier-criteria (selectable strategy) for determining if a data point is an outlier (unlikely member) of cluster
 *
 * @param x Instance for comparison to see if it is an outlier to this cluster
 * @return true if x is an outlier w.r.t. this cluster
 *///from ww w .  jav  a  2 s . c o  m
public final boolean isOutlier(Instance x) {
    boolean ret;
    double p = this.getInclusionProbability(x);
    switch (this.outlierDefinitionStrategyOption.getChosenIndex()) {
    case 0: //Use Chauvenet's Criteria to determine outlier standing of the data point for this cluster.
        ret = (p < getChauvenetLimit());
        break;
    case 1: // use Perceptron
        double[] v = embeddedClassifier.getVotesForInstance(x);
        try {
            weka.core.Utils.normalize(v);
        } catch (Exception e) {
        }
        int oIdx = x.dataset().classAttribute().indexOfValue(AbstractNovelClassClassifier.OUTLIER_LABEL_STR);
        double po = (v.length > oIdx) ? v[oIdx] : 0;
        if (po <= 0) {
            v[oIdx] = 0;
        }
        int h = weka.core.Utils.maxIndex(v);
        double ph = v[h];
        double margin = (po - ph);
        ret = (po > ph) && (margin > (2.0 / v.length));
        break;
    case 2: // 2.5 sigma
        ret = (p < weka.core.FastStats.normalProbability(2.5));
        break;
    case 3: // 3 sigma
        ret = (p < weka.core.FastStats.normalProbability(3));
        break;
    case 4: // 6 sigma 
        ret = (p < weka.core.FastStats.normalProbability(6));
        break;
    case 5: // cheat
        ret = p > 0.5;
        break;
    default:
        ret = p < weka.core.FastStats.normalProbability(2.5);
    }
    return ret;
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Sanity check and initialization of dynamic fields
 *
 * @param x//  w ww  .  j  a v a 2s . c  o  m
 */
protected final void safeInit(Instance x) {
    if (this.embeddedLearnerOption.getValueAsCLIString().contains("Majority class")) {
        this.excludeOutlierVoting = true;
    }
    if (centroid == null) {
        centroid = x.toDoubleArray();
    }
    if (this.instances == null) {
        prepareEmbeddedClassifier();
        ArrayList<Attribute> attribs = new ArrayList<>();
        this.symbolFrequencies = new double[x.dataset().numAttributes()][];
        for (int i = 0; i < x.dataset().numAttributes(); ++i) {
            Attribute a = (Attribute) x.dataset().attribute(i).copy();
            if (i == x.classIndex()) {
                a.setWeight(0.0);
            } else {
                a.setWeight(1.0);
            }
            switch (a.type()) {
            case Attribute.STRING:
            case Attribute.NOMINAL:
                //UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i));
                this.symbolFrequencies[i] = new double[a.numValues()];
                break;
            case Attribute.NUMERIC:
            case Attribute.RELATIONAL:
            case Attribute.DATE:
            default:
                // UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i));
                this.symbolFrequencies[i] = null;
            }
            attribs.add(a);
        }
        this.instances = new Instances("ClusterData", attribs, 1);
        this.instances.setClassIndex(x.classIndex());

    }
    //        else {
    //            for (int i = 0; i < x.dataset().numAttributes() && i < this.header.numAttributes(); ++i) {
    //                double val = x.value(i);
    //                Attribute a = this.header.attribute(i);
    //                // expand range as necessary
    //                if (val < a.getLowerNumericBound() || val > a.getUpperNumericBound()){
    //                    UnsafeUtils.setAttributeRange(a, Math.min(val,a.getLowerNumericBound()), Math.max(val,a.getUpperNumericBound()));
    //                }
    //                // increase frequency counts if new string value is encountered
    //                if (a.type() == Attribute.STRING && (val >= Math.max(this.symbolFrequencies[i].length, a.numValues()))) {
    //                    double newArray[] = new double[Math.max(this.symbolFrequencies[i].length, a.numValues())];
    //                    Arrays.fill(newArray, 0);
    //                    for(int j = 0; j <= this.symbolFrequencies[i].length; j++) {
    //                        newArray[j] = this.symbolFrequencies[i][j];
    //                    }
    //                    this.symbolFrequencies[i] = newArray;
    //                }
    //            }
    //        }
    if (this.variances == null) {
        this.variances = new double[x.numAttributes()];
        Arrays.fill(this.variances, 1);
    }
    if (this.entropies == null) {
        this.entropies = new double[x.numAttributes()];
        Arrays.fill(this.entropies, 0);
    }
    if (this.labelFrequencies == null) {
        this.labelFrequencies = new double[x.numClasses()];
        Arrays.fill(this.labelFrequencies, 0);
    }
    if (this.gtLabelFrequencies == null) {
        this.gtLabelFrequencies = new double[x.numClasses()];
        Arrays.fill(this.gtLabelFrequencies, 0);
    }
    if (this.rho == null) {
        this.rho = new double[x.numAttributes()];
        Arrays.fill(this.rho, 0);
    }
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Use inclusion probability to discover the cluster "nearest" the provided instance
 * Uses main object's outlier container/*from  w  w  w.ja  v a  2 s .co m*/
 * @param x instance in question
 * @return sorted set of clusters, ordered by inc
 */
protected final NearestInstanceTuple[] findNearestOutliers(Instance x) {
    NearestInstanceTuple[] ret = new NearestInstanceTuple[potentialNovels.size()];
    double[] xVals = x.toDoubleArray();
    int idx = 0;
    for (Instance n : potentialNovels) {
        double distance = VectorDistances.distance(xVals, n.toDoubleArray(), x.dataset(),
                this.distanceStrategyOption.getChosenIndex());
        NearestInstanceTuple nit = new NearestInstanceTuple(n, distance);
        ret[idx++] = nit;
    } // end for
    Arrays.parallelSort(ret);
    return ret;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * In cases where this class is not used by the moa.tasks.EvaluateNonStationaryDynamicStream task, this safety
 * (fallback) initialization procedure is necessary.
 *
 * @param x//  ww  w. j  a v a 2s  .c om
 */
public final void safeInit(Instance x) {
    if (this.universalCluster == null) {
        universalCluster = new Riffle(x);
        universalCluster.distanceStrategyOption.setChosenIndex(this.distanceStrategyOption.getChosenIndex());
        double[] initialVariances = new double[x.numAttributes()];
        Arrays.fill(initialVariances, 1.0);
        universalCluster.setVariances(initialVariances);
        universalCluster.setWeight(0);
        universalCluster.recompute();
        bestProbabilitySums = 0;
        bestProbabilityCount = 0;
    }
    if (this.knownLabels == null) {
        this.knownLabels = new int[x.numClasses()];
        Arrays.fill(knownLabels, 0);
        this.numAttributes = x.numAttributes();
    }
    if (this.header == null) {
        this.header = AbstractNovelClassClassifier.augmentInstances(x.dataset());
    }
}

From source file:moa.evaluation.BasicClassificationPerformanceEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] classVotes) {
    double weight = inst.weight();
    int trueClass = (int) inst.classValue();
    if (weight > 0.0) {
        if (this.weightObserved == 0) {
            reset(inst.dataset().numClasses());
        }//from w w w  . ja va  2 s . co  m
        this.weightObserved += weight;
        int predictedClass = Utils.maxIndex(classVotes);
        if (predictedClass == trueClass) {
            this.weightCorrect += weight;
        }
        this.rowKappa[predictedClass] += weight;
        this.columnKappa[trueClass] += weight;
    }
    if (this.lastSeenClass == trueClass) {
        this.weightCorrectNoChangeClassifier += weight;
    }
    this.lastSeenClass = trueClass;
}

From source file:moa.evaluation.BasicClassificationScoringEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] classVotes) {
    double weight = inst.weight();
    int trueClass = (int) inst.classValue();
    if (weight > 0.0) {
        if (this.weightObserved == 0) {
            reset(inst.dataset().numClasses());
        }/*  w w w . ja  v  a  2s . c  o m*/
        this.weightObserved += weight;

        //MSE Calculus
        int predictedClass = Utils.maxIndex(classVotes);
        if (predictedClass == trueClass) {
            this.weightCorrect += weight;
        }

        double[] normalized = normalize(classVotes);

        double vote = 0;
        if (normalized.length > 0) {
            vote = trueClass < normalized.length ? normalized[trueClass] : 0;
        }

        if (Double.compare(vote, Double.NaN) == 0) {
            int countNaN = 0;
            for (int i = 0; i < classVotes.length; ++i) {
                if (Double.compare(normalized[i], Double.NaN) == 0) {
                    countNaN++;
                }
            }
            vote = 1;
            if (countNaN > 1 && classVotes.length > 1) {
                vote = 1.0 / countNaN;
            }

        }
        this.mse += 1 - vote;

        this.saw++;

        this.rowKappa[predictedClass] += weight;
        this.columnKappa[trueClass] += weight;
    }
}

From source file:moa.evaluation.ClassificationWithNovelClassPerformanceEvaluator.java

License:Open Source License

/**
 * /*from   w  ww .j  a  va2  s .  c o m*/
 * Note that for novel class testing, an addition class value is added to the known classes. T
 * This extra "Label" represents a prediction of "Novel Class". This approach allows for
 * algorithms that do not have novel class prediction capabilities to still function,
 * as this method first bounds checks to see if the prediction array includes the added label
 * 
 * @param inst instance under test
 * @param classVotes prediction table for this instance
 */
@Override
public void addResult(Instance inst, double[] classVotes) {
    if (header == null) {
        header = AbstractNovelClassClassifier.augmentInstances(inst.dataset());
        this.novelClassLabel = header.classAttribute()
                .indexOfValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR);
        this.outlierLabel = header.classAttribute()
                .indexOfValue(AbstractNovelClassClassifier.OUTLIER_LABEL_STR);
        this.rowKappa = new double[header.numClasses()];
        Arrays.fill(this.rowKappa, 0.0);
        this.columnKappa = new double[header.numClasses()];
        Arrays.fill(this.columnKappa, 0.0);
        this.knownTrueLabels = new int[header.numClasses()];
        Arrays.fill(knownTrueLabels, 0);
        this.observedLabels = new int[header.numClasses()];
        Arrays.fill(observedLabels, 0);
    }

    final int trueClass = (int) inst.classValue();
    if (classVotes == null) {
        this.knownTrueLabels[trueClass]++;
        return;
    }
    final double[] labelsOnlyVotes = Arrays.copyOf(classVotes, inst.numClasses());
    if (labelsOnlyVotes.length > this.novelClassLabel) {
        labelsOnlyVotes[novelClassLabel] = 0;
    }
    if (labelsOnlyVotes.length > this.outlierLabel) {
        labelsOnlyVotes[outlierLabel] = 0;
    }
    final double totalVoteQty = weka.core.Utils.sum(labelsOnlyVotes);
    final int predictedClass = weka.core.Utils.maxIndex(labelsOnlyVotes); // Don't count the special extended indexes for novel and outlier
    final boolean isMarkedOutlier = (weka.core.Utils.maxIndex(classVotes) == this.outlierLabel);

    if (predictedClass < inst.numClasses() && labelsOnlyVotes[predictedClass] > 0.0) { // Only if there is SOME vote (non-zero)
        this.observedLabels[predictedClass]++; // If we predict it, then it can't be novel!
    }
    //final boolean isTrueNovel = !(this.observedLabels[(int)trueClass] > observationsUntilNotNovelOption.getValue());
    boolean predictedNovel = ((classVotes.length > this.novelClassLabel)
            && (classVotes[this.novelClassLabel] > 0));// this.thresholdOfNoveltyOption.getValue()));

    final boolean isVoteOutlier = (totalVoteQty <= (weka.core.Utils.SMALL * 10.0));
    final boolean correctLabelPrediction = (predictedClass == trueClass);
    switch (this.outlierHandlingStrategyOption.getChosenIndex()) {
    case 0: // use anyway
        // keep on trucking... 
        break;
    case 1: // ignore marked
        if (isMarkedOutlier) {
            return;
        }
        break;
    case 2: // ignore no vote
        if (isVoteOutlier) {
            return;
        }
        break;
    case 3: // ignore iff marked AND no vote
        if (isVoteOutlier && isMarkedOutlier) {
            return;
        }
        break;
    case 4: // ignore pure OR marked
        if (isVoteOutlier || isMarkedOutlier) {
            return;
        }
        break;
    case 5: // mark as novel
        predictedNovel = predictedNovel || isMarkedOutlier;
        break;
    default:
        break;
    }
    this.numberOfInstancesSeen++;
    this.weightObserved += inst.weight(); // /!\ IS THIS RIGHT???
    //final boolean isTrueNovel = (this.knownTrueLabels[trueClass] < this.maxUnobservationsUntilNotNovelOption.getValue()) && (this.observedLabels[trueClass] < observationsUntilNotNovelOption.getValue());
    final boolean isTrueNovel = (this.knownTrueLabels[trueClass] < this.maxUnobservationsUntilNotNovelOption
            .getValue());
    // 8x different mutually exclusive options (i.e. 3-bits)
    if ((!predictedNovel) && (!isTrueNovel) && (correctLabelPrediction)) { // Should be most common
        this.novelClassDetectionTrueNegative++;
        this.weightCorrect++;
    }
    if ((predictedNovel) && (isTrueNovel) && (correctLabelPrediction)) { // Rare if ever
        this.novelClassDetectionTruePositive++;
        this.weightCorrect++;
        assert false : "Paradox 1 - true novel, but predicted the right label";
    }
    if ((predictedNovel) && (!isTrueNovel) && (correctLabelPrediction)) { // Error due to overly restrictive models
        this.novelClassDetectionFalsePositive++;
        if (this.goodIsGoodOption.isSet()) {
            this.weightCorrect++;
        }
    }
    if ((!predictedNovel) && (isTrueNovel) && (correctLabelPrediction)) { // Should never happen?  Framework was wrong here, so TN
        this.novelClassDetectionTrueNegative++;
        this.weightCorrect++;
        assert false : "Paradox 2 - true novel, but predicted the right label";
    }
    if ((predictedNovel) && (isTrueNovel) && (!correctLabelPrediction)) { // Should be most common when x is novel
        this.novelClassDetectionTruePositive++;
        this.weightCorrect++;
    }
    if ((predictedNovel) && (!isTrueNovel) && (!correctLabelPrediction)) { // Probably an Outlier case
        this.novelClassDetectionFalsePositive++;
        if (this.outlierHandlingStrategyOption.getChosenIndex() > 0) {
            this.weightCorrect++;
        }
    }
    if ((!predictedNovel) && (isTrueNovel) && (!correctLabelPrediction)) { // NCD failure     FN
        this.novelClassDetectionFalseNegative++;
    }
    if ((!predictedNovel) && (!isTrueNovel) && (!correctLabelPrediction)) { // Correct NCD, but bad h(x) prediction
        this.novelClassDetectionTrueNegative++;
    }

    this.rowKappa[predictedClass]++;
    this.columnKappa[trueClass]++;
    this.knownTrueLabels[trueClass] += inst.weight();

}