Example usage for weka.core Instance setWeight

Introduction

In this page you can find the example usage for weka.core Instance setWeight.

Prototype

public void setWeight(double weight);

Source Link

Document

Sets the weight of an instance.

Usage

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * @return training accuracy//from  w w  w. ja  va 2s . c o m
 */
private double trainPerceptron() {
    // Train the perceptron from warmup phase clustering 
    final int epochs = 20;
    final int numberOfPerceptrons = 10;
    final int MEMBER = 0;
    final int OUTLIER = 1;
    double accuracySum = 0;
    double accuracyCount = 0;
    this.outlierPerceptronTrainingSet.clear();
    Random rng = new Random(this.randomSeed);

    // Generate training set
    for (Riffle thisCluster : this.clusters) {
        for (Riffle thatCluster : this.clusters) {
            double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER;
            for (Instance x : thatCluster.getHeader()) {
                Instance pseudoPt = makePerceptronInstance(thisCluster, x);
                pseudoPt.setClassValue(groundTruth);
                this.outlierPerceptronTrainingSet.add(pseudoPt);
            }
        }
    }
    this.outlierPerceptronTrainingSet.parallelStream().forEach((x) -> {
        x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances());
    });

    // Boost it
    this.perceptrons = new Perceptron[numberOfPerceptrons];
    this.pweights = new double[numberOfPerceptrons];
    for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) {
        // Discover new weak learner
        Perceptron candidatePerceptron = new Perceptron();
        candidatePerceptron.prepareForUse();
        candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1);
        for (int epoch = 0; epoch < epochs; epoch++) {
            for (Instance x : this.outlierPerceptronTrainingSet) {
                if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling
                    candidatePerceptron.trainOnInstance(x);
                }
            }
        } //end epochs
          // Evaluate weak learner
        double errorFunctionSum = 0;
        double weightSum = 0;
        for (Instance x : this.outlierPerceptronTrainingSet) {
            if (!candidatePerceptron.correctlyClassifies(x)) {
                errorFunctionSum += x.weight();
            }
        }
        // adjust training weights
        for (Instance x : this.outlierPerceptronTrainingSet) {
            double newWeight = x.weight();
            if (candidatePerceptron.correctlyClassifies(x)) {
                newWeight *= errorFunctionSum / (1.0 - errorFunctionSum);
                if (Double.isNaN(newWeight)) {
                    newWeight = weka.core.Utils.SMALL;
                }
                x.setWeight(newWeight);
            }
            weightSum += newWeight;
        }
        // Normalize
        for (Instance x : this.outlierPerceptronTrainingSet) {
            x.setWeight(x.weight() / weightSum);
        }
        // Add to ensemble
        double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum);

        this.perceptrons[perceptronIdx] = candidatePerceptron;
        this.pweights[perceptronIdx] = newPerceptronWeight;
    } // end numPerceptrons

    // Check training error
    accuracySum = 0;
    accuracyCount = 0;
    for (Instance x : this.outlierPerceptronTrainingSet) {
        if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) {
            accuracySum++;
        }
        accuracyCount++;
    }
    double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0;
    this.outlierPerceptronTrainingSet.clear();
    return trainingAccuracy;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 *
 * @param c cluster that is being compared against
 * @param x real data instance//  www. j av  a  2 s  . c  o m
 * @return DenseInstance made to work with the outlier-detecting perceptron
 */
private Instance makePerceptronInstance(Riffle c, Instance x) {
    Instance pseudoPoint = new DenseInstance(this.outlierPerceptronTrainingSet.numAttributes());
    pseudoPoint.setDataset(outlierPerceptronTrainingSet);
    double p = c.getInclusionProbability(x);
    double r = (c.getRadius() != 0) ? c.getRadius() : 1;
    //double w = c.getWeight();
    double N = Math.min(c.size(), this.cacheSizeOption.getValue());
    double d = c.getCenterDistance(x);
    double logP = (p == 0) ? 0 : Math.log(p);
    double logDR = (r == 0 || (d / r) == 0) ? 0 : Math.log(d / r);
    pseudoPoint.setValue(0, logP);
    pseudoPoint.setValue(1, logDR);
    pseudoPoint.setValue(2, logDR * logP);
    pseudoPoint.setValue(3,
            logP - Math.log(1.0 / Math.pow(2.0 * N, this.universalCluster.getHeader().numAttributes())));
    pseudoPoint.setClassValue(0);
    pseudoPoint.setWeight(0.0);
    return pseudoPoint;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * @return training accuracy//from   w  ww.ja  v a2s.co m
 */
private double trainPerceptron() {
    // Train the perceptron from warmup phase clustering 
    final int epochs = 20;
    final int numberOfPerceptrons = 1;
    final int MEMBER = 0;
    final int OUTLIER = 1;
    double accuracySum = 0;
    double accuracyCount = 0;
    this.outlierPerceptronTrainingSet.clear();
    Random rng = new Random(this.randomSeed);

    // Generate training set
    for (Riffle thisCluster : this.clusters) {
        for (Instance x : thisCluster.getHeader()) {
            Instance pseudoPt = makePerceptronInstance(thisCluster, x);
            for (Riffle thatCluster : this.clusters) {
                double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER;
                pseudoPt.setClassValue(groundTruth);
                this.outlierPerceptronTrainingSet.add(pseudoPt);
            }
        }
    }
    for (Instance x : this.outlierPerceptronTrainingSet) {
        x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances());
    }
    ;

    // Boost it
    this.perceptrons = new Perceptron[numberOfPerceptrons];
    this.pweights = new double[numberOfPerceptrons];
    for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) {
        // Discover new weak learner
        Perceptron candidatePerceptron = new Perceptron();
        candidatePerceptron.prepareForUse();
        candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1);
        for (int epoch = 0; epoch < epochs; epoch++) {
            for (Instance x : this.outlierPerceptronTrainingSet) {
                if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling
                    candidatePerceptron.trainOnInstance(x);
                }
            }
        } //end epochs
          // Evaluate weak learner
        double errorFunctionSum = 0;
        double weightSum = 0;
        for (Instance x : this.outlierPerceptronTrainingSet) {
            if (!candidatePerceptron.correctlyClassifies(x)) {
                errorFunctionSum += x.weight();
            }
        }
        // adjust training weights
        for (Instance x : this.outlierPerceptronTrainingSet) {
            double newWeight = x.weight();
            if (candidatePerceptron.correctlyClassifies(x)) {
                newWeight *= errorFunctionSum / (1.0 - errorFunctionSum);
                if (Double.isNaN(newWeight)) {
                    newWeight = weka.core.Utils.SMALL;
                }
                x.setWeight(newWeight);
            }
            weightSum += newWeight;
        }
        // Normalize
        for (Instance x : this.outlierPerceptronTrainingSet) {
            x.setWeight(x.weight() / weightSum);
        }
        // Add to ensemble
        double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum);

        this.perceptrons[perceptronIdx] = candidatePerceptron;
        this.pweights[perceptronIdx] = newPerceptronWeight;
    } // end numPerceptrons

    // Check training error
    accuracySum = 0;
    accuracyCount = 0;
    for (Instance x : this.outlierPerceptronTrainingSet) {
        if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) {
            accuracySum++;
        }
        accuracyCount++;
    }
    double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0;
    this.outlierPerceptronTrainingSet.clear();
    return trainingAccuracy;
}

From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java

License:Open Source License

/**
 *
 *
 * @return instances retrieved from stream
 *//*  w  w w .  jav a 2s.c o m*/
private Instances getChunk() {
    Instances chunk = new Instances(stream.getHeader(), this.chunkSizeOption.getValue());
    // Add "chunk size" number of instances to test directly from the stream (first time we see each instance):
    while (stream.hasMoreInstances() && chunk.numInstances() < this.chunkSizeOption.getValue()) {
        Instance inst = stream.nextInstance();
        this.instancesProcessed++;
        chunk.add(inst);

        if (this.inWarmupPhase) { // For warmup phase, use full and immediate training
            inst.setWeight(1.0);
            latentTrainingInstQueue.addFirst(new TimeBoxedInstance(inst, this.instancesProcessed, 0, null));
        } else if (rng.nextFloat() > this.trainingFractionOption.getValue()) { // Select a portion for latent training set by setting non-training instance weight to zero.
            // place at beginning of the queue/list and record intended activation 'time' for immediate unsupervised 'training'
            inst.setWeight(0.0);
            latentTrainingInstQueue.addFirst(new TimeBoxedInstance(inst, this.instancesProcessed, 0, null));
        } else {
            if (this.sendZeroWeightsOption.isSet()) {
                Instance unsupervisedInstance = (Instance) inst.copy();
                unsupervisedInstance.setWeight(0.0);
                //unsupervisedInstance.setClassValue(0);
                latentTrainingInstQueue.addFirst(
                        new TimeBoxedInstance(unsupervisedInstance, this.instancesProcessed, 0, null));
            }
            // place at end of the queue/list and record intended activation 'time' for latent supervised training
            latentTrainingInstQueue.addLast(new TimeBoxedInstance(inst, this.instancesProcessed,
                    this.trainingTimeDelayOption.getValue(), null));
        }

        // MOA framework housekeeping and reporting...
        if ((instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES) == 0) {
            this.monitor.setCurrentActivityDescription("Updating Metrics");
            if (monitor.taskShouldAbort()) {
                chunk.clear();
                return chunk;
            }
            long estimatedRemainingInstances = stream.estimatedRemainingInstances();

            if (this.instanceLimitOption.getValue() > 0) {
                long maxRemaining = this.instanceLimitOption.getValue() - instancesProcessed;
                if ((estimatedRemainingInstances < 0) || (maxRemaining < estimatedRemainingInstances)) {
                    estimatedRemainingInstances = maxRemaining;
                }
            }
            monitor.setCurrentActivityFractionComplete(
                    (double) instancesProcessed / (double) (instancesProcessed + estimatedRemainingInstances));
        }
    } // end while
    return chunk;
}

From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java

License:Open Source License

/**
 *
 * @param testInstances instance set to evaluate accuracy
 * @return number of instances actually tested
 *///ww  w  .  ja v  a  2  s.c om
private int test(Instances testInstances) {
    this.monitor.setCurrentActivityDescription("Testing Instances");
    int ret = testInstances.size();
    int novelClassLabel = testInstances.numClasses();
    int outlierLabel = novelClassLabel + 1;

    // For latent label outliers that have reached their deadline, we must now make a decision:
    while (!this.pendingFinalLabelInstQueue.isEmpty()
            && this.pendingFinalLabelInstQueue.peek().deadline <= this.instancesProcessed) {
        TimeBoxedInstance ti = this.pendingFinalLabelInstQueue.pop();
        int y = (int) ti.inst.classValue();
        double[] prediction = null;
        if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) {
            Instance novelInst = (Instance) ti.inst.copy();
            //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset()));
            //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR);
            novelInst.setWeight(NOVEL_WEIGHT);
            prediction = learner.getVotesForInstance(novelInst);
            evaluator.addResult(novelInst, prediction); // Outlier out of time. Remove it
        } else {
            prediction = learner.getVotesForInstance(ti.inst);
            evaluator.addResult(ti.inst, prediction); // Outlier out of time. Remove it
        }

        this.cm.add(weka.core.Utils.maxIndex(prediction), ti.inst.classValue());
    }

    // Run accuracy test for current instance(s)
    for (Instance i : testInstances) {
        int y = (int) i.classValue();
        double[] prediction = null;
        Instance instToActuallyPredict = i;
        // If novel, make a special instance
        if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) {
            instToActuallyPredict = (Instance) i.copy();
            //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset()));
            //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); // WARNING - this crashes other algorithms if not also done on training!
            instToActuallyPredict.setWeight(NOVEL_WEIGHT);
        }
        prediction = learner.getVotesForInstance(instToActuallyPredict);
        if ((prediction.length > outlierLabel) && (prediction[outlierLabel] > (1.0 / prediction.length))) {
            this.pendingFinalLabelInstQueue.add(new TimeBoxedInstance(i, this.instancesProcessed,
                    this.labelDeadlineOption.getValue(), prediction)); // Delay accuracy metrics until stale time
        } else {
            evaluator.addResult(instToActuallyPredict, prediction); // Not an outlier, so treat it like normal
            this.cm.add(weka.core.Utils.maxIndex(prediction), i.classValue());
        }
    } // end for

    assert this.pendingFinalLabelInstQueue.size() < (this.labelDeadlineOption.getValue()
            + 1) : "Cache 'pendingFinalLabelInstQueue' is larger than designed.";
    return ret;
}

From source file:mulan.transformations.multiclass.CopyWeight.java

License:Open Source License

/**
 * Transforms a multi-label instance to a list of single-label instances,
 * one for each of the labels that annotate the instance, by copying the
 * feature vector and attaching a weight equal to 1/(list size).
 *
 * @param instance a multi-label instance
 * @return a list with the transformed single-label instances
 *//*w ww  .j  av  a 2s  . c om*/
@Override
List<Instance> transformInstance(Instance instance) {
    List<Instance> copy = super.transformInstance(instance);
    for (Instance anInstance : copy) {
        anInstance.setWeight(1.0 / copy.size());
    }
    return copy;
}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.WekaMIContourDataClassifier.java

License:Open Source License

/**
 * {@inheritDoc}/* w  w  w  . j  a  va 2 s  .  co m*/
 */
@Override
public void buildClassifier(ContourDataGrid cData, VectorDataList bgData) throws Exception {

    // transform input data to weka mi-instances
    m_data = initDataset(cData.numFeatures(), 2, cData.totalLength() + bgData.numVectors(), cData.width());

    for (int r = 0; r < cData.totalLength(); r++) {
        Instances bagData = new Instances(m_data.attribute(1).relation(), cData.width());
        for (int c = 0; c < cData.width(); c++) {
            int vecIdx = cData.getVectorIdx(c, r);
            Instance inst = new DenseInstance(cData.weight(vecIdx), cData.getVector(vecIdx));
            inst.setDataset(bagData);
            bagData.add(inst);
        }
        int value = m_data.attribute(1).addRelation(bagData);
        Instance newBag = new DenseInstance(3);
        newBag.setValue(0, r); // bag id
        newBag.setValue(2, 1); // class attribute
        newBag.setValue(1, value);
        newBag.setWeight(1);
        newBag.setDataset(m_data);
        m_data.add(newBag);
    }

    for (int i = 0; i < bgData.numVectors(); i++) {
        Instances bagData = new Instances(m_data.attribute(1).relation(), cData.width());
        Instance inst = new DenseInstance(bgData.weight(i), bgData.getVector(i));
        inst.setDataset(bagData);
        bagData.add(inst);
        int value = m_data.attribute(1).addRelation(bagData);
        Instance newBag = new DenseInstance(3);
        newBag.setValue(0, cData.totalLength() + i);
        newBag.setValue(2, 0);
        newBag.setValue(1, value);
        newBag.setWeight(1);
        newBag.setDataset(m_data);
        m_data.add(newBag);
    }

    m_classifier.buildClassifier(m_data);
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Splits instances into subsets based on the given split.
 * /*w w w  .j a  v  a 2  s  .c o m*/
 * @param data
 *            the data to work with
 * @return the subsets of instances
 * @throws Exception
 *             if something goes wrong
 */
protected Instances[] splitData(Instances data) throws Exception {

    // Allocate array of Instances objects
    Instances[] subsets = new Instances[m_Prop.length];
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i] = new Instances(data, data.numInstances());
    }

    if (m_Attribute >= data.numAttributes()) {
        if (m_Attribute >= listOfFc.size() + data.numAttributes() - 1) {
            CustomSet cSet = getReqCustomSet(m_Attribute - (data.numAttributes() - 1 + listOfFc.size()),
                    cSetList);
            JsonNode vertices = mapper.readTree(cSet.getConstraints());
            ArrayList<double[]> attrVertices = generateVerticesList(vertices);
            List<Attribute> aList = generateAttributeList(cSet, data, d);
            double[] testPoint = new double[2];
            int ctr = 0;
            for (int k = 0; k < data.numInstances(); k++) {
                ctr = 0;
                for (Attribute a : aList) {
                    testPoint[ctr] = data.instance(k).value(a);
                    ctr++;
                }
                int check = checkPointInPolygon(attrVertices, testPoint);
                subsets[check].add(data.instance(k));
                continue;
            }
        } else {
            Classifier fc;
            double predictedClass;
            // Go through the data
            for (int i = 0; i < data.numInstances(); i++) {

                // Get instance
                Instance inst = data.instance(i);
                String classifierId = getKeyinMap(listOfFc, m_Attribute, data);
                fc = listOfFc.get(classifierId);
                predictedClass = fc.classifyInstance(inst);
                if (predictedClass != Instance.missingValue()) {
                    subsets[(int) predictedClass].add(inst);
                    continue;
                }

                // Else throw an exception
                throw new IllegalArgumentException("Unknown attribute type");
            }
        }
    } else {
        // Go through the data
        for (int i = 0; i < data.numInstances(); i++) {

            // Get instance
            Instance inst = data.instance(i);

            // Does the instance have a missing value?
            if (inst.isMissing(m_Attribute)) {

                // Split instance up
                for (int k = 0; k < m_Prop.length; k++) {
                    if (m_Prop[k] > 0) {
                        Instance copy = (Instance) inst.copy();
                        copy.setWeight(m_Prop[k] * inst.weight());
                        subsets[k].add(copy);
                    }
                }

                // Proceed to next instance
                continue;
            }

            // Do we have a nominal attribute?
            if (data.attribute(m_Attribute).isNominal()) {
                subsets[(int) inst.value(m_Attribute)].add(inst);

                // Proceed to next instance
                continue;
            }

            // Do we have a numeric attribute?
            if (data.attribute(m_Attribute).isNumeric()) {
                subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst);

                // Proceed to next instance
                continue;
            }

            // Else throw an exception
            throw new IllegalArgumentException("Unknown attribute type");
        }
    }

    // Save memory
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i].compactify();
    }

    // Return the subsets
    return subsets;
}

From source file:org.wikipedia.miner.annotation.Disambiguator.java

License:Open Source License

@SuppressWarnings("unchecked")
private void weightTrainingInstances() {

    double positiveInstances = 0;
    double negativeInstances = 0;

    Enumeration<Instance> e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = (Instance) e.nextElement();

        double isValidSense = i.value(3);

        if (isValidSense == 0)
            positiveInstances++;/*from   w  w  w.  j  a  v a  2 s . co m*/
        else
            negativeInstances++;
    }

    double p = (double) positiveInstances / (positiveInstances + negativeInstances);

    e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = (Instance) e.nextElement();

        double isValidSense = i.value(3);

        if (isValidSense == 0)
            i.setWeight(0.5 * (1.0 / p));
        else
            i.setWeight(0.5 * (1.0 / (1 - p)));
    }

}

From source file:org.wikipedia.miner.annotation.weighting.LinkDetector.java

License:Open Source License

@SuppressWarnings("unchecked")
private void weightTrainingInstances() {

    double positiveInstances = 0;
    double negativeInstances = 0;

    Enumeration<Instance> e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = e.nextElement();

        double isValidSense = i.value(attributes.size() - 1);

        if (isValidSense == 0)
            positiveInstances++;/*from  www  .j a v  a  2 s  .c  o m*/
        else
            negativeInstances++;
    }

    double p = (double) positiveInstances / (positiveInstances + negativeInstances);

    System.out.println("stats: positive=" + p + ", negative=" + (1 - p));

    e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = e.nextElement();

        double isLinked = i.value(attributes.size() - 1);

        if (isLinked == 0)
            i.setWeight(0.5 * (1.0 / p));
        else
            i.setWeight(0.5 * (1.0 / (1 - p)));
    }
}