Example usage for weka.core Instance setWeight

Introduction

In this page you can find the example usage for weka.core Instance setWeight.

Prototype

public void setWeight(double weight);

Source Link

Document

Sets the weight of an instance.

Usage

From source file:Pair.java

License:Open Source License

private void doCV(Instances targetData) throws Exception {
    System.out.println();/*from   w  w w . ja  v  a  2s .c  om*/
    System.out.flush();
    int numSourceInstances = m_SourceInstances.numInstances();
    int numInstances = targetData.numInstances() + numSourceInstances;
    numTargetInstances = numInstances - numSourceInstances;
    double weightSource, weightTarget;
    double initialSourceFraction;
    double[] weights = new double[numInstances];
    Random randomInstance = new Random(1);

    Instances data = new Instances(m_SourceInstances, 0, numSourceInstances);
    // Now add the target data, shallow copying the instances as they are added
    // so it doesn't mess up the weights for anyone else
    Enumeration enumer = targetData.enumerateInstances();
    while (enumer.hasMoreElements()) {
        Instance instance = (Instance) enumer.nextElement();
        data.add(instance);
    }

    if (sourceRatio < 0) { //weight all equally
        weightSource = weightTarget = 1.0/*/numInstances*/;
        initialSourceFraction = numSourceInstances / (double) numInstances;
    } else {
        double totalWeight = 1 + sourceRatio;
        weightSource = sourceRatio / totalWeight/*/numSourceInstances*/;
        weightTarget = 1.0 / totalWeight/*/numTargetInstances*/;
        initialSourceFraction = weightSource;
    }
    for (int j = 0; j < numInstances; j++) {
        Instance instance = data.instance(j);
        if (j < numSourceInstances)
            instance.setWeight(weightSource);
        else
            instance.setWeight(weightTarget);
    }

    if (doFraction) {
        for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) {

            sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular]
            if (sourceFraction > .995)
                sourceFraction = .995;
            //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances;
            double sourceWeight = (sourceFraction * numTargetInstances)
                    / (numSourceInstances * (1 - sourceFraction));
            for (int j = 0; j < numInstances; j++) {
                Instance instance = data.instance(j);
                if (j < numSourceInstances)
                    instance.setWeight(sourceWeight);
                else
                    instance.setWeight(1);
            }
            buildClassifierWithWeights(data);
            System.out.println("Iteration " + it + ":" + getTestError());
        }
    } else {

        for (int i = 0; i < numInstances; i++)
            weights[i] = data.instance(i).weight();
        buildClassifierWithWeights(data);
        System.out.println("Iteration -1:" + getTestError());
        for (int i = 0; i < numInstances; i++)
            data.instance(i).setWeight(weights[i]);

        for (int it = 0; it < sourceIterations; it++) {

            Instances sample = null;
            if (!resample || m_NumIterationsPerformed == 0) {
                sample = data;
            } else {
                double sum = data.sumOfWeights();
                double[] sweights = new double[data.numInstances()];
                for (int i = 0; i < sweights.length; i++) {
                    sweights[i] = data.instance(i).weight() / sum;
                }
                sample = data.resampleWithWeights(randomInstance, sweights);
            }

            try {
                m_Classifiers[it].buildClassifier(sample);
            } catch (Exception e) {
                e.printStackTrace();
                System.out.println("E: " + e);
            }

            sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations);
            setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false);

            for (int i = 0; i < numInstances; i++)
                weights[i] = data.instance(i).weight();

            buildClassifierWithWeights(data);

            System.out.println("Iteration " + it + ":" + getTestError());

            for (int i = 0; i < numInstances; i++)
                data.instance(i).setWeight(weights[i]);

        }

    }

}

From source file:Pair.java

License:Open Source License

/**
 * Sets the weights for the next iteration.
 *///from  www .j a  v a2  s.  co  m
protected double setWeights(Instances trainData, Classifier cls, double sourceFraction, int numSourceInstances,
        boolean isFinal) throws Exception {

    Enumeration enu = trainData.enumerateInstances();
    int instNum = 0;
    double[] errors = new double[trainData.numInstances()];
    double max = 0;
    int i = 0;
    while (enu.hasMoreElements()) {
        Instance instance = (Instance) enu.nextElement();
        errors[i] = Math.abs(cls.classifyInstance(instance) - instance.classValue());
        if (i >= numSourceInstances && errors[i] > max)
            max = errors[i];
        i++;
    }

    if (max == 0)
        return -1;

    //get avg loss
    double loss = 0;
    double initialTWeightSum = 0;
    double allWeightSum = 0;
    for (int j = 0; j < errors.length; j++) {
        errors[j] /= max;
        Instance instance = trainData.instance(j);
        loss += instance.weight() * errors[j];
        if (j >= numSourceInstances) {
            //loss += instance.weight() * errors[j];
            initialTWeightSum += instance.weight();
        }
        allWeightSum += instance.weight();
    }
    //loss /= weightSum;
    loss /= allWeightSum;

    targetWeight = initialTWeightSum / allWeightSum;
    /*
    if (!isFinal){
    System.out.println("Target weight: " + targetWeight);
    System.out.println("max: " + max);
    System.out.println("avg error: " + loss * max);
    System.out.println("Loss: " + loss);
    }
    */

    double beta;

    if (fixedBeta)
        beta = 0.4 / 0.6;
    else {
        if (isFinal && loss > 0.499)//bad, so quit
            //return -1;
            loss = 0.499; //since we're doing CV, no reason to quit

        beta = loss / (1 - loss); //or just use beta = .4/.6, since beta isn't as meaningful in AdaBoost.R2;
    }

    double tWeightSum = 0;
    if (!isFinal) {
        //need to find b so that weight of source be sourceFraction*num source
        //do binary search
        double goal = sourceFraction * errors.length;
        double bMin = .001;
        double bMax = .999;
        double b;
        double sourceSum = 0;
        while (bMax - bMin > .001) {
            b = (bMax + bMin) / 2;
            double sum = 0;
            for (int j = 0; j < numSourceInstances; j++) {
                Instance instance = trainData.instance(j);
                sum += Math.pow(b, errors[j]) * instance.weight();
            }
            if (sum > goal)
                bMax = b;
            else
                bMin = b;
        }
        b = (bMax + bMin) / 2;
        //System.out.println(b);         
        for (int j = 0; j < numSourceInstances; j++) {
            Instance instance = trainData.instance(j);
            instance.setWeight(instance.weight() * Math.pow(bMin, errors[j]));
            sourceSum += instance.weight();
        }

        //now adjust target weights
        goal = errors.length - sourceSum;
        double m = goal / initialTWeightSum;

        for (int j = numSourceInstances; j < errors.length; j++) {
            Instance instance = trainData.instance(j);
            instance.setWeight(instance.weight() * m);
        }
    } else {//final
        if (!doUpsource) { //modify only target weights
            for (int j = numSourceInstances; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * Math.pow(beta, -errors[j]));
                tWeightSum += instance.weight();
            }

            double weightSumInverse = initialTWeightSum / tWeightSum;
            for (int j = numSourceInstances; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * weightSumInverse);
            }
        } else { //modify all weights
            for (int j = 0; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * Math.pow(beta, -errors[j]));
                tWeightSum += instance.weight();
            }

            double weightSumInverse = errors.length / tWeightSum;
            for (int j = 0; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * weightSumInverse);
            }
        }

    }

    return beta;
}

From source file:boostingPL.boosting.AdaBoost.java

License:Open Source License

public void run(int t) throws Exception {
    if (t >= numIterations) {
        return;/* w w  w . j  a va2  s. c  om*/
    }

    classifiers[t] = ClassifierWritable.newInstance("DecisionStump");
    //classifiers[t] = ClassifiersHelper.newInstance("C4.5");
    classifiers[t].buildClassifier(insts);

    double e = weightError(t);
    if (e >= 0.5) {
        System.out.println("AdaBoost Error: error rate = " + e + ", >= 0.5");
        throw new Exception("error rate > 0.5");
    }

    if (e == 0.0) {
        e = 0.0001; // don't let e == 0
    }
    cweights[t] = 0.5 * Math.log((1 - e) / e) / Math.log(Math.E);
    System.out.println("Round = " + t + "\t ErrorRate = " + e + "\t\t Weights = " + cweights[t]);

    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        if (classifiers[t].classifyInstance(inst) != inst.classValue()) {
            inst.setWeight(inst.weight() / (2 * e));
        } else {
            inst.setWeight(inst.weight() / (2 * (1 - e)));
        }
    }
}

From source file:boostingPL.boosting.SAMME.java

License:Open Source License

public void run(int t) throws Exception {
    if (t >= numIterations) {
        return;/*from   w  ww.j a  v a 2  s.c  om*/
    }

    classifiers[t] = ClassifierWritable.newInstance("DecisionStump");
    classifiers[t].buildClassifier(insts);

    double e = weightError(t);
    final int numClasses = insts.classAttribute().numValues();
    double maxe = 1 - 1.0 / numClasses;
    if (e >= maxe) {
        System.out.println("SAMME Error: error rate = " + e + ", >= " + maxe);
        throw new Exception("error rate > " + maxe);
    }

    if (e == 0.0) {
        e = 0.0001; // dont let e == 0
    }
    cweights[t] = Math.log((1 - e) / e) + Math.log(numClasses - 1);
    System.out.println("Round = " + t + "\tErrorRate = " + e + "\tCWeight = " + cweights[t]);

    double expCWeight = Math.exp(cweights[t]);
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        if (classifiers[t].classifyInstance(inst) != inst.classValue()) {
            inst.setWeight(inst.weight() * expCWeight);
        }
    }

    double weightSum = insts.sumOfWeights();
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        inst.setWeight(inst.weight() / weightSum);
    }

}

From source file:distributedRedditAnalyser.OzaBoost.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    try {//  w  w w  . ja v a 2  s. c  om
        lock.acquire();
        //Get a new classifier
        Classifier newClassifier = ((Classifier) getPreparedClassOption(this.baseLearnerOption)).copy();
        ensemble.add(new ClassifierInstance(newClassifier));

        //If we have too many classifiers
        while (ensemble.size() > ensembleSizeOption.getValue())
            ensemble.pollFirst();

        double lambda_d = 1.0;
        for (ClassifierInstance c : ensemble) {
            double k = this.pureBoostOption.isSet() ? lambda_d
                    : MiscUtils.poisson(lambda_d, this.classifierRandom);
            if (k > 0.0) {
                Instance weightedInst = (Instance) inst.copy();
                weightedInst.setWeight(inst.weight() * k);
                c.getClassifier().trainOnInstance(weightedInst);
            }
            if (c.getClassifier().correctlyClassifies(inst)) {
                c.setScms(c.getScms() + lambda_d);
                lambda_d *= this.trainingWeightSeenByModel / (2 * c.getScms());
            } else {
                c.setSwms(c.getSwms() + lambda_d);
                lambda_d *= this.trainingWeightSeenByModel / (2 * c.getSwms());
            }
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    } finally {
        lock.release();
    }
}

From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java

License:Open Source License

/**
 * Converts a feature set object to a weka Instances object.
 * <p/>//www  .ja v  a  2  s .  c om
 * Use wekas instance weighting capability to assign weights for each data point.
 *
 * @param feature_set the feature set to convert
 * @param fn          a weight function
 * @return a weka instances object
 */
public static Instances convertFeatureSetToWeightedWekaInstances(FeatureSet feature_set, WeightFunction fn) {
    ArrayList<Attribute> attributes = generateWekaAttributes(feature_set.getFeatures());
    Instances instances = new Instances("AuToBI_feature_set", attributes, feature_set.getDataPoints().size());
    for (Word w : feature_set.getDataPoints()) {
        Instance inst = ClassifierUtils.assignWekaAttributes(instances, w);
        inst.setWeight(fn.weight(w));
        instances.add(inst);
    }

    ClassifierUtils.setWekaClassAttribute(instances, feature_set.getClassAttribute());
    return instances;
}

From source file:gyc.OverBoostM1.java

License:Open Source License

/**
 * Sets the weights for the next iteration.
 * /*from  w  w w. j a va2s .c o m*/
 * @param training the training instances
 * @param reweight the reweighting factor
 * @throws Exception if something goes wrong
 */
protected void setWeights(Instances training, double reweight) throws Exception {

    double oldSumOfWeights, newSumOfWeights;

    oldSumOfWeights = training.sumOfWeights();
    Enumeration enu = training.enumerateInstances();
    while (enu.hasMoreElements()) {
        Instance instance = (Instance) enu.nextElement();
        if (!Utils.eq(m_Classifiers[m_NumIterationsPerformed].classifyInstance(instance),
                instance.classValue()))
            instance.setWeight(instance.weight() * reweight);
    }

    // Renormalize weights
    newSumOfWeights = training.sumOfWeights();
    enu = training.enumerateInstances();
    while (enu.hasMoreElements()) {
        Instance instance = (Instance) enu.nextElement();
        instance.setWeight(instance.weight() * oldSumOfWeights / newSumOfWeights);
    }
}

From source file:j48.GraftSplit.java

License:Open Source License

/**
 * builds m_graftdistro using the passed data
 *
 * @param data the instances to use when creating the distribution
 *//*w  ww . jav  a 2 s .c  om*/
public void buildClassifier(Instances data) throws Exception {

    // distribution for the graft, not counting cases in atbop, only orig leaf
    m_graftdistro = new Distribution(2, data.numClasses());

    // which subset are we looking at for the graft?
    int subset = subsetOfInterest(); // this is the subset for m_leaf

    double thisNodeCount = 0;
    double knownCases = 0;
    boolean allKnown = true;
    // populate distribution
    for (int x = 0; x < data.numInstances(); x++) {
        Instance instance = data.instance(x);
        if (instance.isMissing(m_attIndex)) {
            allKnown = false;
            continue;
        }
        knownCases += instance.weight();
        int subst = whichSubset(instance);
        if (subst == -1)
            continue;
        m_graftdistro.add(subst, instance);
        if (subst == subset) { // instance belongs at m_leaf
            thisNodeCount += instance.weight();
        }
    }
    double factor = (knownCases == 0) ? (1.0 / (double) 2.0) : (thisNodeCount / knownCases);
    if (!allKnown) {
        for (int x = 0; x < data.numInstances(); x++) {
            if (data.instance(x).isMissing(m_attIndex)) {
                Instance instance = data.instance(x);
                int subst = whichSubset(instance);
                if (subst == -1)
                    continue;
                instance.setWeight(instance.weight() * factor);
                m_graftdistro.add(subst, instance);
            }
        }
    }

    // if there are no cases at the leaf, make sure the desired
    // class is chosen, by setting counts to 0.01
    if (m_graftdistro.perBag(subset) == 0) {
        double[] counts = new double[data.numClasses()];
        counts[m_maxClass] = 0.01;
        m_graftdistro.add(subset, counts);
    }
    if (m_graftdistro.perBag((subset == 0) ? 1 : 0) == 0) {
        double[] counts = new double[data.numClasses()];
        counts[(int) m_otherLeafMaxClass] = 0.01;
        m_graftdistro.add((subset == 0) ? 1 : 0, counts);
    }
}

From source file:j48.NBTreeSplit.java

License:Open Source License

/**
 * Creates split on enumerated attribute.
 *
 * @exception Exception if something goes wrong
 *///from  w w  w .  j ava2  s .  c  o  m
private void handleEnumeratedAttribute(Instances trainInstances) throws Exception {

    m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights);
    m_c45S.buildClassifier(trainInstances);
    if (m_c45S.numSubsets() == 0) {
        return;
    }
    m_errors = 0;
    Instance instance;

    Instances[] trainingSets = new Instances[m_complexityIndex];
    for (int i = 0; i < m_complexityIndex; i++) {
        trainingSets[i] = new Instances(trainInstances, 0);
    }
    /*    m_distribution = new Distribution(m_complexityIndex,
     trainInstances.numClasses()); */
    int subset;
    for (int i = 0; i < trainInstances.numInstances(); i++) {
        instance = trainInstances.instance(i);
        subset = m_c45S.whichSubset(instance);
        if (subset > -1) {
            trainingSets[subset].add((Instance) instance.copy());
        } else {
            double[] weights = m_c45S.weights(instance);
            for (int j = 0; j < m_complexityIndex; j++) {
                try {
                    Instance temp = (Instance) instance.copy();
                    if (weights.length == m_complexityIndex) {
                        temp.setWeight(temp.weight() * weights[j]);
                    } else {
                        temp.setWeight(temp.weight() / m_complexityIndex);
                    }
                    trainingSets[j].add(temp);
                } catch (Exception ex) {
                    ex.printStackTrace();
                    System.err.println("*** " + m_complexityIndex);
                    System.err.println(weights.length);
                    System.exit(1);
                }
            }
        }
    }

    /*    // compute weights (weights of instances per subset
    m_weights = new double [m_complexityIndex];
    for (int i = 0; i < m_complexityIndex; i++) {
      m_weights[i] = trainingSets[i].sumOfWeights();
    }
    Utils.normalize(m_weights); */

    /*
    // Only Instances with known values are relevant.
    Enumeration enu = trainInstances.enumerateInstances();
    while (enu.hasMoreElements()) {
      instance = (Instance) enu.nextElement();
      if (!instance.isMissing(m_attIndex)) {
    //   m_distribution.add((int)instance.value(m_attIndex),instance);
    trainingSets[(int)instances.value(m_attIndex)].add(instance);
      } else {
    // add these to the error count
    m_errors += instance.weight();
      }
      } */

    Random r = new Random(1);
    int minNumCount = 0;
    for (int i = 0; i < m_complexityIndex; i++) {
        if (trainingSets[i].numInstances() >= 5) {
            minNumCount++;
            // Discretize the sets
            Discretize disc = new Discretize();
            disc.setInputFormat(trainingSets[i]);
            trainingSets[i] = Filter.useFilter(trainingSets[i], disc);

            trainingSets[i].randomize(r);
            trainingSets[i].stratify(5);
            NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable();
            fullModel.buildClassifier(trainingSets[i]);

            // add the errors for this branch of the split
            m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r);
        } else {
            // if fewer than min obj then just count them as errors
            for (int j = 0; j < trainingSets[i].numInstances(); j++) {
                m_errors += trainingSets[i].instance(j).weight();
            }
        }
    }

    // Check if there are at least five instances in at least two of the subsets
    // subsets.
    if (minNumCount > 1) {
        m_numSubsets = m_complexityIndex;
    }
}

From source file:j48.NBTreeSplit.java

License:Open Source License

/**
 * Creates split on numeric attribute./*  www .j a  va 2  s  .  c o  m*/
 *
 * @exception Exception if something goes wrong
 */
private void handleNumericAttribute(Instances trainInstances) throws Exception {

    m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights);
    m_c45S.buildClassifier(trainInstances);
    if (m_c45S.numSubsets() == 0) {
        return;
    }
    m_errors = 0;

    Instances[] trainingSets = new Instances[m_complexityIndex];
    trainingSets[0] = new Instances(trainInstances, 0);
    trainingSets[1] = new Instances(trainInstances, 0);
    int subset = -1;

    // populate the subsets
    for (int i = 0; i < trainInstances.numInstances(); i++) {
        Instance instance = trainInstances.instance(i);
        subset = m_c45S.whichSubset(instance);
        if (subset != -1) {
            trainingSets[subset].add((Instance) instance.copy());
        } else {
            double[] weights = m_c45S.weights(instance);
            for (int j = 0; j < m_complexityIndex; j++) {
                Instance temp = (Instance) instance.copy();
                if (weights.length == m_complexityIndex) {
                    temp.setWeight(temp.weight() * weights[j]);
                } else {
                    temp.setWeight(temp.weight() / m_complexityIndex);
                }
                trainingSets[j].add(temp);
            }
        }
    }

    /*    // compute weights (weights of instances per subset
    m_weights = new double [m_complexityIndex];
    for (int i = 0; i < m_complexityIndex; i++) {
      m_weights[i] = trainingSets[i].sumOfWeights();
    }
    Utils.normalize(m_weights); */

    Random r = new Random(1);
    int minNumCount = 0;
    for (int i = 0; i < m_complexityIndex; i++) {
        if (trainingSets[i].numInstances() > 5) {
            minNumCount++;
            // Discretize the sets
            Discretize disc = new Discretize();
            disc.setInputFormat(trainingSets[i]);
            trainingSets[i] = Filter.useFilter(trainingSets[i], disc);

            trainingSets[i].randomize(r);
            trainingSets[i].stratify(5);
            NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable();
            fullModel.buildClassifier(trainingSets[i]);

            // add the errors for this branch of the split
            m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r);
        } else {
            for (int j = 0; j < trainingSets[i].numInstances(); j++) {
                m_errors += trainingSets[i].instance(j).weight();
            }
        }
    }

    // Check if minimum number of Instances in at least two
    // subsets.
    if (minNumCount > 1) {
        m_numSubsets = m_complexityIndex;
    }
}