Example usage for weka.core Instance setWeight

List of usage examples for weka.core Instance setWeight

Introduction

In this page you can find the example usage for weka.core Instance setWeight.

Prototype

public void setWeight(double weight);

Source Link

Document

Sets the weight of an instance.

Usage

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * @return training accuracy//from  w w  w. ja  va 2s . c o m
 */
private double trainPerceptron() {
    // Train the perceptron from warmup phase clustering 
    final int epochs = 20;
    final int numberOfPerceptrons = 10;
    final int MEMBER = 0;
    final int OUTLIER = 1;
    double accuracySum = 0;
    double accuracyCount = 0;
    this.outlierPerceptronTrainingSet.clear();
    Random rng = new Random(this.randomSeed);

    // Generate training set
    for (Riffle thisCluster : this.clusters) {
        for (Riffle thatCluster : this.clusters) {
            double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER;
            for (Instance x : thatCluster.getHeader()) {
                Instance pseudoPt = makePerceptronInstance(thisCluster, x);
                pseudoPt.setClassValue(groundTruth);
                this.outlierPerceptronTrainingSet.add(pseudoPt);
            }
        }
    }
    this.outlierPerceptronTrainingSet.parallelStream().forEach((x) -> {
        x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances());
    });

    // Boost it
    this.perceptrons = new Perceptron[numberOfPerceptrons];
    this.pweights = new double[numberOfPerceptrons];
    for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) {
        // Discover new weak learner
        Perceptron candidatePerceptron = new Perceptron();
        candidatePerceptron.prepareForUse();
        candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1);
        for (int epoch = 0; epoch < epochs; epoch++) {
            for (Instance x : this.outlierPerceptronTrainingSet) {
                if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling
                    candidatePerceptron.trainOnInstance(x);
                }
            }
        } //end epochs
          // Evaluate weak learner
        double errorFunctionSum = 0;
        double weightSum = 0;
        for (Instance x : this.outlierPerceptronTrainingSet) {
            if (!candidatePerceptron.correctlyClassifies(x)) {
                errorFunctionSum += x.weight();
            }
        }
        // adjust training weights
        for (Instance x : this.outlierPerceptronTrainingSet) {
            double newWeight = x.weight();
            if (candidatePerceptron.correctlyClassifies(x)) {
                newWeight *= errorFunctionSum / (1.0 - errorFunctionSum);
                if (Double.isNaN(newWeight)) {
                    newWeight = weka.core.Utils.SMALL;
                }
                x.setWeight(newWeight);
            }
            weightSum += newWeight;
        }
        // Normalize
        for (Instance x : this.outlierPerceptronTrainingSet) {
            x.setWeight(x.weight() / weightSum);
        }
        // Add to ensemble
        double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum);

        this.perceptrons[perceptronIdx] = candidatePerceptron;
        this.pweights[perceptronIdx] = newPerceptronWeight;
    } // end numPerceptrons

    // Check training error
    accuracySum = 0;
    accuracyCount = 0;
    for (Instance x : this.outlierPerceptronTrainingSet) {
        if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) {
            accuracySum++;
        }
        accuracyCount++;
    }
    double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0;
    this.outlierPerceptronTrainingSet.clear();
    return trainingAccuracy;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 *
 * @param c cluster that is being compared against
 * @param x real data instance//  www. j av  a  2 s  . c  o m
 * @return DenseInstance made to work with the outlier-detecting perceptron
 */
private Instance makePerceptronInstance(Riffle c, Instance x) {
    Instance pseudoPoint = new DenseInstance(this.outlierPerceptronTrainingSet.numAttributes());
    pseudoPoint.setDataset(outlierPerceptronTrainingSet);
    double p = c.getInclusionProbability(x);
    double r = (c.getRadius() != 0) ? c.getRadius() : 1;
    //double w = c.getWeight();
    double N = Math.min(c.size(), this.cacheSizeOption.getValue());
    double d = c.getCenterDistance(x);
    double logP = (p == 0) ? 0 : Math.log(p);
    double logDR = (r == 0 || (d / r) == 0) ? 0 : Math.log(d / r);
    pseudoPoint.setValue(0, logP);
    pseudoPoint.setValue(1, logDR);
    pseudoPoint.setValue(2, logDR * logP);
    pseudoPoint.setValue(3,
            logP - Math.log(1.0 / Math.pow(2.0 * N, this.universalCluster.getHeader().numAttributes())));
    pseudoPoint.setClassValue(0);
    pseudoPoint.setWeight(0.0);
    return pseudoPoint;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * @return training accuracy//from   w  ww.ja  v a2s.co m
 */
private double trainPerceptron() {
    // Train the perceptron from warmup phase clustering 
    final int epochs = 20;
    final int numberOfPerceptrons = 1;
    final int MEMBER = 0;
    final int OUTLIER = 1;
    double accuracySum = 0;
    double accuracyCount = 0;
    this.outlierPerceptronTrainingSet.clear();
    Random rng = new Random(this.randomSeed);

    // Generate training set
    for (Riffle thisCluster : this.clusters) {
        for (Instance x : thisCluster.getHeader()) {
            Instance pseudoPt = makePerceptronInstance(thisCluster, x);
            for (Riffle thatCluster : this.clusters) {
                double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER;
                pseudoPt.setClassValue(groundTruth);
                this.outlierPerceptronTrainingSet.add(pseudoPt);
            }
        }
    }
    for (Instance x : this.outlierPerceptronTrainingSet) {
        x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances());
    }
    ;

    // Boost it
    this.perceptrons = new Perceptron[numberOfPerceptrons];
    this.pweights = new double[numberOfPerceptrons];
    for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) {
        // Discover new weak learner
        Perceptron candidatePerceptron = new Perceptron();
        candidatePerceptron.prepareForUse();
        candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1);
        for (int epoch = 0; epoch < epochs; epoch++) {
            for (Instance x : this.outlierPerceptronTrainingSet) {
                if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling
                    candidatePerceptron.trainOnInstance(x);
                }
            }
        } //end epochs
          // Evaluate weak learner
        double errorFunctionSum = 0;
        double weightSum = 0;
        for (Instance x : this.outlierPerceptronTrainingSet) {
            if (!candidatePerceptron.correctlyClassifies(x)) {
                errorFunctionSum += x.weight();
            }
        }
        // adjust training weights
        for (Instance x : this.outlierPerceptronTrainingSet) {
            double newWeight = x.weight();
            if (candidatePerceptron.correctlyClassifies(x)) {
                newWeight *= errorFunctionSum / (1.0 - errorFunctionSum);
                if (Double.isNaN(newWeight)) {
                    newWeight = weka.core.Utils.SMALL;
                }
                x.setWeight(newWeight);
            }
            weightSum += newWeight;
        }
        // Normalize
        for (Instance x : this.outlierPerceptronTrainingSet) {
            x.setWeight(x.weight() / weightSum);
        }
        // Add to ensemble
        double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum);

        this.perceptrons[perceptronIdx] = candidatePerceptron;
        this.pweights[perceptronIdx] = newPerceptronWeight;
    } // end numPerceptrons

    // Check training error
    accuracySum = 0;
    accuracyCount = 0;
    for (Instance x : this.outlierPerceptronTrainingSet) {
        if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) {
            accuracySum++;
        }
        accuracyCount++;
    }
    double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0;
    this.outlierPerceptronTrainingSet.clear();
    return trainingAccuracy;
}

From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java

License:Open Source License

/**
 *
 *
 * @return instances retrieved from stream
 *//*  w  w w .  jav a 2s.c o m*/
private Instances getChunk() {
    Instances chunk = new Instances(stream.getHeader(), this.chunkSizeOption.getValue());
    // Add "chunk size" number of instances to test directly from the stream (first time we see each instance):
    while (stream.hasMoreInstances() && chunk.numInstances() < this.chunkSizeOption.getValue()) {
        Instance inst = stream.nextInstance();
        this.instancesProcessed++;
        chunk.add(inst);

        if (this.inWarmupPhase) { // For warmup phase, use full and immediate training
            inst.setWeight(1.0);
            latentTrainingInstQueue.addFirst(new TimeBoxedInstance(inst, this.instancesProcessed, 0, null));
        } else if (rng.nextFloat() > this.trainingFractionOption.getValue()) { // Select a portion for latent training set by setting non-training instance weight to zero.
            // place at beginning of the queue/list and record intended activation 'time' for immediate unsupervised 'training'
            inst.setWeight(0.0);
            latentTrainingInstQueue.addFirst(new TimeBoxedInstance(inst, this.instancesProcessed, 0, null));
        } else {
            if (this.sendZeroWeightsOption.isSet()) {
                Instance unsupervisedInstance = (Instance) inst.copy();
                unsupervisedInstance.setWeight(0.0);
                //unsupervisedInstance.setClassValue(0);
                latentTrainingInstQueue.addFirst(
                        new TimeBoxedInstance(unsupervisedInstance, this.instancesProcessed, 0, null));
            }
            // place at end of the queue/list and record intended activation 'time' for latent supervised training
            latentTrainingInstQueue.addLast(new TimeBoxedInstance(inst, this.instancesProcessed,
                    this.trainingTimeDelayOption.getValue(), null));
        }

        // MOA framework housekeeping and reporting...
        if ((instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES) == 0) {
            this.monitor.setCurrentActivityDescription("Updating Metrics");
            if (monitor.taskShouldAbort()) {
                chunk.clear();
                return chunk;
            }
            long estimatedRemainingInstances = stream.estimatedRemainingInstances();

            if (this.instanceLimitOption.getValue() > 0) {
                long maxRemaining = this.instanceLimitOption.getValue() - instancesProcessed;
                if ((estimatedRemainingInstances < 0) || (maxRemaining < estimatedRemainingInstances)) {
                    estimatedRemainingInstances = maxRemaining;
                }
            }
            monitor.setCurrentActivityFractionComplete(
                    (double) instancesProcessed / (double) (instancesProcessed + estimatedRemainingInstances));
        }
    } // end while
    return chunk;
}

From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java

License:Open Source License

/**
 *
 * @param testInstances instance set to evaluate accuracy
 * @return number of instances actually tested
 *///ww  w  .  ja v  a  2  s.c om
private int test(Instances testInstances) {
    this.monitor.setCurrentActivityDescription("Testing Instances");
    int ret = testInstances.size();
    int novelClassLabel = testInstances.numClasses();
    int outlierLabel = novelClassLabel + 1;

    // For latent label outliers that have reached their deadline, we must now make a decision:
    while (!this.pendingFinalLabelInstQueue.isEmpty()
            && this.pendingFinalLabelInstQueue.peek().deadline <= this.instancesProcessed) {
        TimeBoxedInstance ti = this.pendingFinalLabelInstQueue.pop();
        int y = (int) ti.inst.classValue();
        double[] prediction = null;
        if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) {
            Instance novelInst = (Instance) ti.inst.copy();
            //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset()));
            //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR);
            novelInst.setWeight(NOVEL_WEIGHT);
            prediction = learner.getVotesForInstance(novelInst);
            evaluator.addResult(novelInst, prediction); // Outlier out of time. Remove it
        } else {
            prediction = learner.getVotesForInstance(ti.inst);
            evaluator.addResult(ti.inst, prediction); // Outlier out of time. Remove it
        }

        this.cm.add(weka.core.Utils.maxIndex(prediction), ti.inst.classValue());
    }

    // Run accuracy test for current instance(s)
    for (Instance i : testInstances) {
        int y = (int) i.classValue();
        double[] prediction = null;
        Instance instToActuallyPredict = i;
        // If novel, make a special instance
        if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) {
            instToActuallyPredict = (Instance) i.copy();
            //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset()));
            //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); // WARNING - this crashes other algorithms if not also done on training!
            instToActuallyPredict.setWeight(NOVEL_WEIGHT);
        }
        prediction = learner.getVotesForInstance(instToActuallyPredict);
        if ((prediction.length > outlierLabel) && (prediction[outlierLabel] > (1.0 / prediction.length))) {
            this.pendingFinalLabelInstQueue.add(new TimeBoxedInstance(i, this.instancesProcessed,
                    this.labelDeadlineOption.getValue(), prediction)); // Delay accuracy metrics until stale time
        } else {
            evaluator.addResult(instToActuallyPredict, prediction); // Not an outlier, so treat it like normal
            this.cm.add(weka.core.Utils.maxIndex(prediction), i.classValue());
        }
    } // end for

    assert this.pendingFinalLabelInstQueue.size() < (this.labelDeadlineOption.getValue()
            + 1) : "Cache 'pendingFinalLabelInstQueue' is larger than designed.";
    return ret;
}

From source file:mulan.transformations.multiclass.CopyWeight.java

License:Open Source License

/**
 * Transforms a multi-label instance to a list of single-label instances,
 * one for each of the labels that annotate the instance, by copying the
 * feature vector and attaching a weight equal to 1/(list size).
 *
 * @param instance a multi-label instance
 * @return a list with the transformed single-label instances
 *//*w ww  .j  av  a 2s  . c om*/
@Override
List<Instance> transformInstance(Instance instance) {
    List<Instance> copy = super.transformInstance(instance);
    for (Instance anInstance : copy) {
        anInstance.setWeight(1.0 / copy.size());
    }
    return copy;
}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.WekaMIContourDataClassifier.java

License:Open Source License

/**
 * {@inheritDoc}/* w  w  w  . j  a  va 2 s  .  co m*/
 */
@Override
public void buildClassifier(ContourDataGrid cData, VectorDataList bgData) throws Exception {

    // transform input data to weka mi-instances
    m_data = initDataset(cData.numFeatures(), 2, cData.totalLength() + bgData.numVectors(), cData.width());

    for (int r = 0; r < cData.totalLength(); r++) {
        Instances bagData = new Instances(m_data.attribute(1).relation(), cData.width());
        for (int c = 0; c < cData.width(); c++) {
            int vecIdx = cData.getVectorIdx(c, r);
            Instance inst = new DenseInstance(cData.weight(vecIdx), cData.getVector(vecIdx));
            inst.setDataset(bagData);
            bagData.add(inst);
        }
        int value = m_data.attribute(1).addRelation(bagData);
        Instance newBag = new DenseInstance(3);
        newBag.setValue(0, r); // bag id
        newBag.setValue(2, 1); // class attribute
        newBag.setValue(1, value);
        newBag.setWeight(1);
        newBag.setDataset(m_data);
        m_data.add(newBag);
    }

    for (int i = 0; i < bgData.numVectors(); i++) {
        Instances bagData = new Instances(m_data.attribute(1).relation(), cData.width());
        Instance inst = new DenseInstance(bgData.weight(i), bgData.getVector(i));
        inst.setDataset(bagData);
        bagData.add(inst);
        int value = m_data.attribute(1).addRelation(bagData);
        Instance newBag = new DenseInstance(3);
        newBag.setValue(0, cData.totalLength() + i);
        newBag.setValue(2, 0);
        newBag.setValue(1, value);
        newBag.setWeight(1);
        newBag.setDataset(m_data);
        m_data.add(newBag);
    }

    m_classifier.buildClassifier(m_data);
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Splits instances into subsets based on the given split.
 * /*w w w  .j a  v  a 2  s  .c o m*/
 * @param data
 *            the data to work with
 * @return the subsets of instances
 * @throws Exception
 *             if something goes wrong
 */
protected Instances[] splitData(Instances data) throws Exception {

    // Allocate array of Instances objects
    Instances[] subsets = new Instances[m_Prop.length];
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i] = new Instances(data, data.numInstances());
    }

    if (m_Attribute >= data.numAttributes()) {
        if (m_Attribute >= listOfFc.size() + data.numAttributes() - 1) {
            CustomSet cSet = getReqCustomSet(m_Attribute - (data.numAttributes() - 1 + listOfFc.size()),
                    cSetList);
            JsonNode vertices = mapper.readTree(cSet.getConstraints());
            ArrayList<double[]> attrVertices = generateVerticesList(vertices);
            List<Attribute> aList = generateAttributeList(cSet, data, d);
            double[] testPoint = new double[2];
            int ctr = 0;
            for (int k = 0; k < data.numInstances(); k++) {
                ctr = 0;
                for (Attribute a : aList) {
                    testPoint[ctr] = data.instance(k).value(a);
                    ctr++;
                }
                int check = checkPointInPolygon(attrVertices, testPoint);
                subsets[check].add(data.instance(k));
                continue;
            }
        } else {
            Classifier fc;
            double predictedClass;
            // Go through the data
            for (int i = 0; i < data.numInstances(); i++) {

                // Get instance
                Instance inst = data.instance(i);
                String classifierId = getKeyinMap(listOfFc, m_Attribute, data);
                fc = listOfFc.get(classifierId);
                predictedClass = fc.classifyInstance(inst);
                if (predictedClass != Instance.missingValue()) {
                    subsets[(int) predictedClass].add(inst);
                    continue;
                }

                // Else throw an exception
                throw new IllegalArgumentException("Unknown attribute type");
            }
        }
    } else {
        // Go through the data
        for (int i = 0; i < data.numInstances(); i++) {

            // Get instance
            Instance inst = data.instance(i);

            // Does the instance have a missing value?
            if (inst.isMissing(m_Attribute)) {

                // Split instance up
                for (int k = 0; k < m_Prop.length; k++) {
                    if (m_Prop[k] > 0) {
                        Instance copy = (Instance) inst.copy();
                        copy.setWeight(m_Prop[k] * inst.weight());
                        subsets[k].add(copy);
                    }
                }

                // Proceed to next instance
                continue;
            }

            // Do we have a nominal attribute?
            if (data.attribute(m_Attribute).isNominal()) {
                subsets[(int) inst.value(m_Attribute)].add(inst);

                // Proceed to next instance
                continue;
            }

            // Do we have a numeric attribute?
            if (data.attribute(m_Attribute).isNumeric()) {
                subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst);

                // Proceed to next instance
                continue;
            }

            // Else throw an exception
            throw new IllegalArgumentException("Unknown attribute type");
        }
    }

    // Save memory
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i].compactify();
    }

    // Return the subsets
    return subsets;
}

From source file:org.wikipedia.miner.annotation.Disambiguator.java

License:Open Source License

@SuppressWarnings("unchecked")
private void weightTrainingInstances() {

    double positiveInstances = 0;
    double negativeInstances = 0;

    Enumeration<Instance> e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = (Instance) e.nextElement();

        double isValidSense = i.value(3);

        if (isValidSense == 0)
            positiveInstances++;/*from   w  w  w.  j  a  v a  2 s . co m*/
        else
            negativeInstances++;
    }

    double p = (double) positiveInstances / (positiveInstances + negativeInstances);

    e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = (Instance) e.nextElement();

        double isValidSense = i.value(3);

        if (isValidSense == 0)
            i.setWeight(0.5 * (1.0 / p));
        else
            i.setWeight(0.5 * (1.0 / (1 - p)));
    }

}

From source file:org.wikipedia.miner.annotation.weighting.LinkDetector.java

License:Open Source License

@SuppressWarnings("unchecked")
private void weightTrainingInstances() {

    double positiveInstances = 0;
    double negativeInstances = 0;

    Enumeration<Instance> e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = e.nextElement();

        double isValidSense = i.value(attributes.size() - 1);

        if (isValidSense == 0)
            positiveInstances++;/*from  www  .j a v  a  2 s  .c  o m*/
        else
            negativeInstances++;
    }

    double p = (double) positiveInstances / (positiveInstances + negativeInstances);

    System.out.println("stats: positive=" + p + ", negative=" + (1 - p));

    e = trainingData.enumerateInstances();

    while (e.hasMoreElements()) {
        Instance i = e.nextElement();

        double isLinked = i.value(attributes.size() - 1);

        if (isLinked == 0)
            i.setWeight(0.5 * (1.0 / p));
        else
            i.setWeight(0.5 * (1.0 / (1 - p)));
    }
}