Example usage for weka.core Instance setWeight

List of usage examples for weka.core Instance setWeight

Introduction

In this page you can find the example usage for weka.core Instance setWeight.

Prototype

public void setWeight(double weight);

Source Link

Document

Sets the weight of an instance.

Usage

From source file:Pair.java

License:Open Source License

private void doCV(Instances targetData) throws Exception {
    System.out.println();/*from   w  w w . ja  v  a  2s .c  om*/
    System.out.flush();
    int numSourceInstances = m_SourceInstances.numInstances();
    int numInstances = targetData.numInstances() + numSourceInstances;
    numTargetInstances = numInstances - numSourceInstances;
    double weightSource, weightTarget;
    double initialSourceFraction;
    double[] weights = new double[numInstances];
    Random randomInstance = new Random(1);

    Instances data = new Instances(m_SourceInstances, 0, numSourceInstances);
    // Now add the target data, shallow copying the instances as they are added
    // so it doesn't mess up the weights for anyone else
    Enumeration enumer = targetData.enumerateInstances();
    while (enumer.hasMoreElements()) {
        Instance instance = (Instance) enumer.nextElement();
        data.add(instance);
    }

    if (sourceRatio < 0) { //weight all equally
        weightSource = weightTarget = 1.0/*/numInstances*/;
        initialSourceFraction = numSourceInstances / (double) numInstances;
    } else {
        double totalWeight = 1 + sourceRatio;
        weightSource = sourceRatio / totalWeight/*/numSourceInstances*/;
        weightTarget = 1.0 / totalWeight/*/numTargetInstances*/;
        initialSourceFraction = weightSource;
    }
    for (int j = 0; j < numInstances; j++) {
        Instance instance = data.instance(j);
        if (j < numSourceInstances)
            instance.setWeight(weightSource);
        else
            instance.setWeight(weightTarget);
    }

    if (doFraction) {
        for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) {

            sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular]
            if (sourceFraction > .995)
                sourceFraction = .995;
            //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances;
            double sourceWeight = (sourceFraction * numTargetInstances)
                    / (numSourceInstances * (1 - sourceFraction));
            for (int j = 0; j < numInstances; j++) {
                Instance instance = data.instance(j);
                if (j < numSourceInstances)
                    instance.setWeight(sourceWeight);
                else
                    instance.setWeight(1);
            }
            buildClassifierWithWeights(data);
            System.out.println("Iteration " + it + ":" + getTestError());
        }
    } else {

        for (int i = 0; i < numInstances; i++)
            weights[i] = data.instance(i).weight();
        buildClassifierWithWeights(data);
        System.out.println("Iteration -1:" + getTestError());
        for (int i = 0; i < numInstances; i++)
            data.instance(i).setWeight(weights[i]);

        for (int it = 0; it < sourceIterations; it++) {

            Instances sample = null;
            if (!resample || m_NumIterationsPerformed == 0) {
                sample = data;
            } else {
                double sum = data.sumOfWeights();
                double[] sweights = new double[data.numInstances()];
                for (int i = 0; i < sweights.length; i++) {
                    sweights[i] = data.instance(i).weight() / sum;
                }
                sample = data.resampleWithWeights(randomInstance, sweights);
            }

            try {
                m_Classifiers[it].buildClassifier(sample);
            } catch (Exception e) {
                e.printStackTrace();
                System.out.println("E: " + e);
            }

            sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations);
            setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false);

            for (int i = 0; i < numInstances; i++)
                weights[i] = data.instance(i).weight();

            buildClassifierWithWeights(data);

            System.out.println("Iteration " + it + ":" + getTestError());

            for (int i = 0; i < numInstances; i++)
                data.instance(i).setWeight(weights[i]);

        }

    }

}

From source file:Pair.java

License:Open Source License

/**
 * Sets the weights for the next iteration.
 *///from  www .j a  v a2  s.  co  m
protected double setWeights(Instances trainData, Classifier cls, double sourceFraction, int numSourceInstances,
        boolean isFinal) throws Exception {

    Enumeration enu = trainData.enumerateInstances();
    int instNum = 0;
    double[] errors = new double[trainData.numInstances()];
    double max = 0;
    int i = 0;
    while (enu.hasMoreElements()) {
        Instance instance = (Instance) enu.nextElement();
        errors[i] = Math.abs(cls.classifyInstance(instance) - instance.classValue());
        if (i >= numSourceInstances && errors[i] > max)
            max = errors[i];
        i++;
    }

    if (max == 0)
        return -1;

    //get avg loss
    double loss = 0;
    double initialTWeightSum = 0;
    double allWeightSum = 0;
    for (int j = 0; j < errors.length; j++) {
        errors[j] /= max;
        Instance instance = trainData.instance(j);
        loss += instance.weight() * errors[j];
        if (j >= numSourceInstances) {
            //loss += instance.weight() * errors[j];
            initialTWeightSum += instance.weight();
        }
        allWeightSum += instance.weight();
    }
    //loss /= weightSum;
    loss /= allWeightSum;

    targetWeight = initialTWeightSum / allWeightSum;
    /*
    if (!isFinal){
    System.out.println("Target weight: " + targetWeight);
    System.out.println("max: " + max);
    System.out.println("avg error: " + loss * max);
    System.out.println("Loss: " + loss);
    }
    */

    double beta;

    if (fixedBeta)
        beta = 0.4 / 0.6;
    else {
        if (isFinal && loss > 0.499)//bad, so quit
            //return -1;
            loss = 0.499; //since we're doing CV, no reason to quit

        beta = loss / (1 - loss); //or just use beta = .4/.6, since beta isn't as meaningful in AdaBoost.R2;
    }

    double tWeightSum = 0;
    if (!isFinal) {
        //need to find b so that weight of source be sourceFraction*num source
        //do binary search
        double goal = sourceFraction * errors.length;
        double bMin = .001;
        double bMax = .999;
        double b;
        double sourceSum = 0;
        while (bMax - bMin > .001) {
            b = (bMax + bMin) / 2;
            double sum = 0;
            for (int j = 0; j < numSourceInstances; j++) {
                Instance instance = trainData.instance(j);
                sum += Math.pow(b, errors[j]) * instance.weight();
            }
            if (sum > goal)
                bMax = b;
            else
                bMin = b;
        }
        b = (bMax + bMin) / 2;
        //System.out.println(b);         
        for (int j = 0; j < numSourceInstances; j++) {
            Instance instance = trainData.instance(j);
            instance.setWeight(instance.weight() * Math.pow(bMin, errors[j]));
            sourceSum += instance.weight();
        }

        //now adjust target weights
        goal = errors.length - sourceSum;
        double m = goal / initialTWeightSum;

        for (int j = numSourceInstances; j < errors.length; j++) {
            Instance instance = trainData.instance(j);
            instance.setWeight(instance.weight() * m);
        }
    } else {//final
        if (!doUpsource) { //modify only target weights
            for (int j = numSourceInstances; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * Math.pow(beta, -errors[j]));
                tWeightSum += instance.weight();
            }

            double weightSumInverse = initialTWeightSum / tWeightSum;
            for (int j = numSourceInstances; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * weightSumInverse);
            }
        } else { //modify all weights
            for (int j = 0; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * Math.pow(beta, -errors[j]));
                tWeightSum += instance.weight();
            }

            double weightSumInverse = errors.length / tWeightSum;
            for (int j = 0; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * weightSumInverse);
            }
        }

    }

    return beta;
}

From source file:boostingPL.boosting.AdaBoost.java

License:Open Source License

public void run(int t) throws Exception {
    if (t >= numIterations) {
        return;/* w w  w . j  a va2  s. c  om*/
    }

    classifiers[t] = ClassifierWritable.newInstance("DecisionStump");
    //classifiers[t] = ClassifiersHelper.newInstance("C4.5");
    classifiers[t].buildClassifier(insts);

    double e = weightError(t);
    if (e >= 0.5) {
        System.out.println("AdaBoost Error: error rate = " + e + ", >= 0.5");
        throw new Exception("error rate > 0.5");
    }

    if (e == 0.0) {
        e = 0.0001; // don't let e == 0
    }
    cweights[t] = 0.5 * Math.log((1 - e) / e) / Math.log(Math.E);
    System.out.println("Round = " + t + "\t ErrorRate = " + e + "\t\t Weights = " + cweights[t]);

    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        if (classifiers[t].classifyInstance(inst) != inst.classValue()) {
            inst.setWeight(inst.weight() / (2 * e));
        } else {
            inst.setWeight(inst.weight() / (2 * (1 - e)));
        }
    }
}

From source file:boostingPL.boosting.SAMME.java

License:Open Source License

public void run(int t) throws Exception {
    if (t >= numIterations) {
        return;/*from   w  ww.j a  v a 2  s.c  om*/
    }

    classifiers[t] = ClassifierWritable.newInstance("DecisionStump");
    classifiers[t].buildClassifier(insts);

    double e = weightError(t);
    final int numClasses = insts.classAttribute().numValues();
    double maxe = 1 - 1.0 / numClasses;
    if (e >= maxe) {
        System.out.println("SAMME Error: error rate = " + e + ", >= " + maxe);
        throw new Exception("error rate > " + maxe);
    }

    if (e == 0.0) {
        e = 0.0001; // dont let e == 0
    }
    cweights[t] = Math.log((1 - e) / e) + Math.log(numClasses - 1);
    System.out.println("Round = " + t + "\tErrorRate = " + e + "\tCWeight = " + cweights[t]);

    double expCWeight = Math.exp(cweights[t]);
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        if (classifiers[t].classifyInstance(inst) != inst.classValue()) {
            inst.setWeight(inst.weight() * expCWeight);
        }
    }

    double weightSum = insts.sumOfWeights();
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        inst.setWeight(inst.weight() / weightSum);
    }

}

From source file:distributedRedditAnalyser.OzaBoost.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    try {//  w  w w  . ja v a 2  s. c  om
        lock.acquire();
        //Get a new classifier
        Classifier newClassifier = ((Classifier) getPreparedClassOption(this.baseLearnerOption)).copy();
        ensemble.add(new ClassifierInstance(newClassifier));

        //If we have too many classifiers
        while (ensemble.size() > ensembleSizeOption.getValue())
            ensemble.pollFirst();

        double lambda_d = 1.0;
        for (ClassifierInstance c : ensemble) {
            double k = this.pureBoostOption.isSet() ? lambda_d
                    : MiscUtils.poisson(lambda_d, this.classifierRandom);
            if (k > 0.0) {
                Instance weightedInst = (Instance) inst.copy();
                weightedInst.setWeight(inst.weight() * k);
                c.getClassifier().trainOnInstance(weightedInst);
            }
            if (c.getClassifier().correctlyClassifies(inst)) {
                c.setScms(c.getScms() + lambda_d);
                lambda_d *= this.trainingWeightSeenByModel / (2 * c.getScms());
            } else {
                c.setSwms(c.getSwms() + lambda_d);
                lambda_d *= this.trainingWeightSeenByModel / (2 * c.getSwms());
            }
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    } finally {
        lock.release();
    }
}

From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java

License:Open Source License

/**
 * Converts a feature set object to a weka Instances object.
 * <p/>//www  .ja v  a  2  s .  c om
 * Use wekas instance weighting capability to assign weights for each data point.
 *
 * @param feature_set the feature set to convert
 * @param fn          a weight function
 * @return a weka instances object
 */
public static Instances convertFeatureSetToWeightedWekaInstances(FeatureSet feature_set, WeightFunction fn) {
    ArrayList<Attribute> attributes = generateWekaAttributes(feature_set.getFeatures());
    Instances instances = new Instances("AuToBI_feature_set", attributes, feature_set.getDataPoints().size());
    for (Word w : feature_set.getDataPoints()) {
        Instance inst = ClassifierUtils.assignWekaAttributes(instances, w);
        inst.setWeight(fn.weight(w));
        instances.add(inst);
    }

    ClassifierUtils.setWekaClassAttribute(instances, feature_set.getClassAttribute());
    return instances;
}

From source file:gyc.OverBoostM1.java

License:Open Source License

/**
 * Sets the weights for the next iteration.
 * /*from  w  w w. j a va2s .c o m*/
 * @param training the training instances
 * @param reweight the reweighting factor
 * @throws Exception if something goes wrong
 */
protected void setWeights(Instances training, double reweight) throws Exception {

    double oldSumOfWeights, newSumOfWeights;

    oldSumOfWeights = training.sumOfWeights();
    Enumeration enu = training.enumerateInstances();
    while (enu.hasMoreElements()) {
        Instance instance = (Instance) enu.nextElement();
        if (!Utils.eq(m_Classifiers[m_NumIterationsPerformed].classifyInstance(instance),
                instance.classValue()))
            instance.setWeight(instance.weight() * reweight);
    }

    // Renormalize weights
    newSumOfWeights = training.sumOfWeights();
    enu = training.enumerateInstances();
    while (enu.hasMoreElements()) {
        Instance instance = (Instance) enu.nextElement();
        instance.setWeight(instance.weight() * oldSumOfWeights / newSumOfWeights);
    }
}

From source file:j48.GraftSplit.java

License:Open Source License

/**
 * builds m_graftdistro using the passed data
 *
 * @param data the instances to use when creating the distribution
 *//*w  ww . jav  a 2 s .c  om*/
public void buildClassifier(Instances data) throws Exception {

    // distribution for the graft, not counting cases in atbop, only orig leaf
    m_graftdistro = new Distribution(2, data.numClasses());

    // which subset are we looking at for the graft?
    int subset = subsetOfInterest(); // this is the subset for m_leaf

    double thisNodeCount = 0;
    double knownCases = 0;
    boolean allKnown = true;
    // populate distribution
    for (int x = 0; x < data.numInstances(); x++) {
        Instance instance = data.instance(x);
        if (instance.isMissing(m_attIndex)) {
            allKnown = false;
            continue;
        }
        knownCases += instance.weight();
        int subst = whichSubset(instance);
        if (subst == -1)
            continue;
        m_graftdistro.add(subst, instance);
        if (subst == subset) { // instance belongs at m_leaf
            thisNodeCount += instance.weight();
        }
    }
    double factor = (knownCases == 0) ? (1.0 / (double) 2.0) : (thisNodeCount / knownCases);
    if (!allKnown) {
        for (int x = 0; x < data.numInstances(); x++) {
            if (data.instance(x).isMissing(m_attIndex)) {
                Instance instance = data.instance(x);
                int subst = whichSubset(instance);
                if (subst == -1)
                    continue;
                instance.setWeight(instance.weight() * factor);
                m_graftdistro.add(subst, instance);
            }
        }
    }

    // if there are no cases at the leaf, make sure the desired
    // class is chosen, by setting counts to 0.01
    if (m_graftdistro.perBag(subset) == 0) {
        double[] counts = new double[data.numClasses()];
        counts[m_maxClass] = 0.01;
        m_graftdistro.add(subset, counts);
    }
    if (m_graftdistro.perBag((subset == 0) ? 1 : 0) == 0) {
        double[] counts = new double[data.numClasses()];
        counts[(int) m_otherLeafMaxClass] = 0.01;
        m_graftdistro.add((subset == 0) ? 1 : 0, counts);
    }
}

From source file:j48.NBTreeSplit.java

License:Open Source License

/**
 * Creates split on enumerated attribute.
 *
 * @exception Exception if something goes wrong
 *///from  w w  w .  j ava2  s .  c  o  m
private void handleEnumeratedAttribute(Instances trainInstances) throws Exception {

    m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights);
    m_c45S.buildClassifier(trainInstances);
    if (m_c45S.numSubsets() == 0) {
        return;
    }
    m_errors = 0;
    Instance instance;

    Instances[] trainingSets = new Instances[m_complexityIndex];
    for (int i = 0; i < m_complexityIndex; i++) {
        trainingSets[i] = new Instances(trainInstances, 0);
    }
    /*    m_distribution = new Distribution(m_complexityIndex,
     trainInstances.numClasses()); */
    int subset;
    for (int i = 0; i < trainInstances.numInstances(); i++) {
        instance = trainInstances.instance(i);
        subset = m_c45S.whichSubset(instance);
        if (subset > -1) {
            trainingSets[subset].add((Instance) instance.copy());
        } else {
            double[] weights = m_c45S.weights(instance);
            for (int j = 0; j < m_complexityIndex; j++) {
                try {
                    Instance temp = (Instance) instance.copy();
                    if (weights.length == m_complexityIndex) {
                        temp.setWeight(temp.weight() * weights[j]);
                    } else {
                        temp.setWeight(temp.weight() / m_complexityIndex);
                    }
                    trainingSets[j].add(temp);
                } catch (Exception ex) {
                    ex.printStackTrace();
                    System.err.println("*** " + m_complexityIndex);
                    System.err.println(weights.length);
                    System.exit(1);
                }
            }
        }
    }

    /*    // compute weights (weights of instances per subset
    m_weights = new double [m_complexityIndex];
    for (int i = 0; i < m_complexityIndex; i++) {
      m_weights[i] = trainingSets[i].sumOfWeights();
    }
    Utils.normalize(m_weights); */

    /*
    // Only Instances with known values are relevant.
    Enumeration enu = trainInstances.enumerateInstances();
    while (enu.hasMoreElements()) {
      instance = (Instance) enu.nextElement();
      if (!instance.isMissing(m_attIndex)) {
    //   m_distribution.add((int)instance.value(m_attIndex),instance);
    trainingSets[(int)instances.value(m_attIndex)].add(instance);
      } else {
    // add these to the error count
    m_errors += instance.weight();
      }
      } */

    Random r = new Random(1);
    int minNumCount = 0;
    for (int i = 0; i < m_complexityIndex; i++) {
        if (trainingSets[i].numInstances() >= 5) {
            minNumCount++;
            // Discretize the sets
            Discretize disc = new Discretize();
            disc.setInputFormat(trainingSets[i]);
            trainingSets[i] = Filter.useFilter(trainingSets[i], disc);

            trainingSets[i].randomize(r);
            trainingSets[i].stratify(5);
            NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable();
            fullModel.buildClassifier(trainingSets[i]);

            // add the errors for this branch of the split
            m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r);
        } else {
            // if fewer than min obj then just count them as errors
            for (int j = 0; j < trainingSets[i].numInstances(); j++) {
                m_errors += trainingSets[i].instance(j).weight();
            }
        }
    }

    // Check if there are at least five instances in at least two of the subsets
    // subsets.
    if (minNumCount > 1) {
        m_numSubsets = m_complexityIndex;
    }
}

From source file:j48.NBTreeSplit.java

License:Open Source License

/**
 * Creates split on numeric attribute./*  www .j a  va 2  s  .  c o  m*/
 *
 * @exception Exception if something goes wrong
 */
private void handleNumericAttribute(Instances trainInstances) throws Exception {

    m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights);
    m_c45S.buildClassifier(trainInstances);
    if (m_c45S.numSubsets() == 0) {
        return;
    }
    m_errors = 0;

    Instances[] trainingSets = new Instances[m_complexityIndex];
    trainingSets[0] = new Instances(trainInstances, 0);
    trainingSets[1] = new Instances(trainInstances, 0);
    int subset = -1;

    // populate the subsets
    for (int i = 0; i < trainInstances.numInstances(); i++) {
        Instance instance = trainInstances.instance(i);
        subset = m_c45S.whichSubset(instance);
        if (subset != -1) {
            trainingSets[subset].add((Instance) instance.copy());
        } else {
            double[] weights = m_c45S.weights(instance);
            for (int j = 0; j < m_complexityIndex; j++) {
                Instance temp = (Instance) instance.copy();
                if (weights.length == m_complexityIndex) {
                    temp.setWeight(temp.weight() * weights[j]);
                } else {
                    temp.setWeight(temp.weight() / m_complexityIndex);
                }
                trainingSets[j].add(temp);
            }
        }
    }

    /*    // compute weights (weights of instances per subset
    m_weights = new double [m_complexityIndex];
    for (int i = 0; i < m_complexityIndex; i++) {
      m_weights[i] = trainingSets[i].sumOfWeights();
    }
    Utils.normalize(m_weights); */

    Random r = new Random(1);
    int minNumCount = 0;
    for (int i = 0; i < m_complexityIndex; i++) {
        if (trainingSets[i].numInstances() > 5) {
            minNumCount++;
            // Discretize the sets
            Discretize disc = new Discretize();
            disc.setInputFormat(trainingSets[i]);
            trainingSets[i] = Filter.useFilter(trainingSets[i], disc);

            trainingSets[i].randomize(r);
            trainingSets[i].stratify(5);
            NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable();
            fullModel.buildClassifier(trainingSets[i]);

            // add the errors for this branch of the split
            m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r);
        } else {
            for (int j = 0; j < trainingSets[i].numInstances(); j++) {
                m_errors += trainingSets[i].instance(j).weight();
            }
        }
    }

    // Check if minimum number of Instances in at least two
    // subsets.
    if (minNumCount > 1) {
        m_numSubsets = m_complexityIndex;
    }
}