List of usage examples for weka.core Instance setWeight
public void setWeight(double weight);
From source file:Pair.java
License:Open Source License
private void doCV(Instances targetData) throws Exception { System.out.println();/*from w w w . ja v a 2s .c om*/ System.out.flush(); int numSourceInstances = m_SourceInstances.numInstances(); int numInstances = targetData.numInstances() + numSourceInstances; numTargetInstances = numInstances - numSourceInstances; double weightSource, weightTarget; double initialSourceFraction; double[] weights = new double[numInstances]; Random randomInstance = new Random(1); Instances data = new Instances(m_SourceInstances, 0, numSourceInstances); // Now add the target data, shallow copying the instances as they are added // so it doesn't mess up the weights for anyone else Enumeration enumer = targetData.enumerateInstances(); while (enumer.hasMoreElements()) { Instance instance = (Instance) enumer.nextElement(); data.add(instance); } if (sourceRatio < 0) { //weight all equally weightSource = weightTarget = 1.0/*/numInstances*/; initialSourceFraction = numSourceInstances / (double) numInstances; } else { double totalWeight = 1 + sourceRatio; weightSource = sourceRatio / totalWeight/*/numSourceInstances*/; weightTarget = 1.0 / totalWeight/*/numTargetInstances*/; initialSourceFraction = weightSource; } for (int j = 0; j < numInstances; j++) { Instance instance = data.instance(j); if (j < numSourceInstances) instance.setWeight(weightSource); else instance.setWeight(weightTarget); } if (doFraction) { for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) { sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular] if (sourceFraction > .995) sourceFraction = .995; //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances; double sourceWeight = (sourceFraction * numTargetInstances) / (numSourceInstances * (1 - sourceFraction)); for (int j = 0; j < numInstances; j++) { Instance instance = data.instance(j); if (j < numSourceInstances) instance.setWeight(sourceWeight); else instance.setWeight(1); } buildClassifierWithWeights(data); System.out.println("Iteration " + it + ":" + getTestError()); } } else { for (int i = 0; i < numInstances; i++) weights[i] = data.instance(i).weight(); buildClassifierWithWeights(data); System.out.println("Iteration -1:" + getTestError()); for (int i = 0; i < numInstances; i++) data.instance(i).setWeight(weights[i]); for (int it = 0; it < sourceIterations; it++) { Instances sample = null; if (!resample || m_NumIterationsPerformed == 0) { sample = data; } else { double sum = data.sumOfWeights(); double[] sweights = new double[data.numInstances()]; for (int i = 0; i < sweights.length; i++) { sweights[i] = data.instance(i).weight() / sum; } sample = data.resampleWithWeights(randomInstance, sweights); } try { m_Classifiers[it].buildClassifier(sample); } catch (Exception e) { e.printStackTrace(); System.out.println("E: " + e); } sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations); setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false); for (int i = 0; i < numInstances; i++) weights[i] = data.instance(i).weight(); buildClassifierWithWeights(data); System.out.println("Iteration " + it + ":" + getTestError()); for (int i = 0; i < numInstances; i++) data.instance(i).setWeight(weights[i]); } } }
From source file:Pair.java
License:Open Source License
/** * Sets the weights for the next iteration. *///from www .j a v a2 s. co m protected double setWeights(Instances trainData, Classifier cls, double sourceFraction, int numSourceInstances, boolean isFinal) throws Exception { Enumeration enu = trainData.enumerateInstances(); int instNum = 0; double[] errors = new double[trainData.numInstances()]; double max = 0; int i = 0; while (enu.hasMoreElements()) { Instance instance = (Instance) enu.nextElement(); errors[i] = Math.abs(cls.classifyInstance(instance) - instance.classValue()); if (i >= numSourceInstances && errors[i] > max) max = errors[i]; i++; } if (max == 0) return -1; //get avg loss double loss = 0; double initialTWeightSum = 0; double allWeightSum = 0; for (int j = 0; j < errors.length; j++) { errors[j] /= max; Instance instance = trainData.instance(j); loss += instance.weight() * errors[j]; if (j >= numSourceInstances) { //loss += instance.weight() * errors[j]; initialTWeightSum += instance.weight(); } allWeightSum += instance.weight(); } //loss /= weightSum; loss /= allWeightSum; targetWeight = initialTWeightSum / allWeightSum; /* if (!isFinal){ System.out.println("Target weight: " + targetWeight); System.out.println("max: " + max); System.out.println("avg error: " + loss * max); System.out.println("Loss: " + loss); } */ double beta; if (fixedBeta) beta = 0.4 / 0.6; else { if (isFinal && loss > 0.499)//bad, so quit //return -1; loss = 0.499; //since we're doing CV, no reason to quit beta = loss / (1 - loss); //or just use beta = .4/.6, since beta isn't as meaningful in AdaBoost.R2; } double tWeightSum = 0; if (!isFinal) { //need to find b so that weight of source be sourceFraction*num source //do binary search double goal = sourceFraction * errors.length; double bMin = .001; double bMax = .999; double b; double sourceSum = 0; while (bMax - bMin > .001) { b = (bMax + bMin) / 2; double sum = 0; for (int j = 0; j < numSourceInstances; j++) { Instance instance = trainData.instance(j); sum += Math.pow(b, errors[j]) * instance.weight(); } if (sum > goal) bMax = b; else bMin = b; } b = (bMax + bMin) / 2; //System.out.println(b); for (int j = 0; j < numSourceInstances; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(bMin, errors[j])); sourceSum += instance.weight(); } //now adjust target weights goal = errors.length - sourceSum; double m = goal / initialTWeightSum; for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * m); } } else {//final if (!doUpsource) { //modify only target weights for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(beta, -errors[j])); tWeightSum += instance.weight(); } double weightSumInverse = initialTWeightSum / tWeightSum; for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * weightSumInverse); } } else { //modify all weights for (int j = 0; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(beta, -errors[j])); tWeightSum += instance.weight(); } double weightSumInverse = errors.length / tWeightSum; for (int j = 0; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * weightSumInverse); } } } return beta; }
From source file:boostingPL.boosting.AdaBoost.java
License:Open Source License
public void run(int t) throws Exception { if (t >= numIterations) { return;/* w w w . j a va2 s. c om*/ } classifiers[t] = ClassifierWritable.newInstance("DecisionStump"); //classifiers[t] = ClassifiersHelper.newInstance("C4.5"); classifiers[t].buildClassifier(insts); double e = weightError(t); if (e >= 0.5) { System.out.println("AdaBoost Error: error rate = " + e + ", >= 0.5"); throw new Exception("error rate > 0.5"); } if (e == 0.0) { e = 0.0001; // don't let e == 0 } cweights[t] = 0.5 * Math.log((1 - e) / e) / Math.log(Math.E); System.out.println("Round = " + t + "\t ErrorRate = " + e + "\t\t Weights = " + cweights[t]); for (int i = 0; i < insts.numInstances(); i++) { Instance inst = insts.instance(i); if (classifiers[t].classifyInstance(inst) != inst.classValue()) { inst.setWeight(inst.weight() / (2 * e)); } else { inst.setWeight(inst.weight() / (2 * (1 - e))); } } }
From source file:boostingPL.boosting.SAMME.java
License:Open Source License
public void run(int t) throws Exception { if (t >= numIterations) { return;/*from w ww.j a v a 2 s.c om*/ } classifiers[t] = ClassifierWritable.newInstance("DecisionStump"); classifiers[t].buildClassifier(insts); double e = weightError(t); final int numClasses = insts.classAttribute().numValues(); double maxe = 1 - 1.0 / numClasses; if (e >= maxe) { System.out.println("SAMME Error: error rate = " + e + ", >= " + maxe); throw new Exception("error rate > " + maxe); } if (e == 0.0) { e = 0.0001; // dont let e == 0 } cweights[t] = Math.log((1 - e) / e) + Math.log(numClasses - 1); System.out.println("Round = " + t + "\tErrorRate = " + e + "\tCWeight = " + cweights[t]); double expCWeight = Math.exp(cweights[t]); for (int i = 0; i < insts.numInstances(); i++) { Instance inst = insts.instance(i); if (classifiers[t].classifyInstance(inst) != inst.classValue()) { inst.setWeight(inst.weight() * expCWeight); } } double weightSum = insts.sumOfWeights(); for (int i = 0; i < insts.numInstances(); i++) { Instance inst = insts.instance(i); inst.setWeight(inst.weight() / weightSum); } }
From source file:distributedRedditAnalyser.OzaBoost.java
License:Open Source License
@Override public void trainOnInstanceImpl(Instance inst) { try {// w w w . ja v a 2 s. c om lock.acquire(); //Get a new classifier Classifier newClassifier = ((Classifier) getPreparedClassOption(this.baseLearnerOption)).copy(); ensemble.add(new ClassifierInstance(newClassifier)); //If we have too many classifiers while (ensemble.size() > ensembleSizeOption.getValue()) ensemble.pollFirst(); double lambda_d = 1.0; for (ClassifierInstance c : ensemble) { double k = this.pureBoostOption.isSet() ? lambda_d : MiscUtils.poisson(lambda_d, this.classifierRandom); if (k > 0.0) { Instance weightedInst = (Instance) inst.copy(); weightedInst.setWeight(inst.weight() * k); c.getClassifier().trainOnInstance(weightedInst); } if (c.getClassifier().correctlyClassifies(inst)) { c.setScms(c.getScms() + lambda_d); lambda_d *= this.trainingWeightSeenByModel / (2 * c.getScms()); } else { c.setSwms(c.getSwms() + lambda_d); lambda_d *= this.trainingWeightSeenByModel / (2 * c.getSwms()); } } } catch (InterruptedException e) { e.printStackTrace(); } finally { lock.release(); } }
From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java
License:Open Source License
/** * Converts a feature set object to a weka Instances object. * <p/>//www .ja v a 2 s . c om * Use wekas instance weighting capability to assign weights for each data point. * * @param feature_set the feature set to convert * @param fn a weight function * @return a weka instances object */ public static Instances convertFeatureSetToWeightedWekaInstances(FeatureSet feature_set, WeightFunction fn) { ArrayList<Attribute> attributes = generateWekaAttributes(feature_set.getFeatures()); Instances instances = new Instances("AuToBI_feature_set", attributes, feature_set.getDataPoints().size()); for (Word w : feature_set.getDataPoints()) { Instance inst = ClassifierUtils.assignWekaAttributes(instances, w); inst.setWeight(fn.weight(w)); instances.add(inst); } ClassifierUtils.setWekaClassAttribute(instances, feature_set.getClassAttribute()); return instances; }
From source file:gyc.OverBoostM1.java
License:Open Source License
/** * Sets the weights for the next iteration. * /*from w w w. j a va2s .c o m*/ * @param training the training instances * @param reweight the reweighting factor * @throws Exception if something goes wrong */ protected void setWeights(Instances training, double reweight) throws Exception { double oldSumOfWeights, newSumOfWeights; oldSumOfWeights = training.sumOfWeights(); Enumeration enu = training.enumerateInstances(); while (enu.hasMoreElements()) { Instance instance = (Instance) enu.nextElement(); if (!Utils.eq(m_Classifiers[m_NumIterationsPerformed].classifyInstance(instance), instance.classValue())) instance.setWeight(instance.weight() * reweight); } // Renormalize weights newSumOfWeights = training.sumOfWeights(); enu = training.enumerateInstances(); while (enu.hasMoreElements()) { Instance instance = (Instance) enu.nextElement(); instance.setWeight(instance.weight() * oldSumOfWeights / newSumOfWeights); } }
From source file:j48.GraftSplit.java
License:Open Source License
/** * builds m_graftdistro using the passed data * * @param data the instances to use when creating the distribution *//*w ww . jav a 2 s .c om*/ public void buildClassifier(Instances data) throws Exception { // distribution for the graft, not counting cases in atbop, only orig leaf m_graftdistro = new Distribution(2, data.numClasses()); // which subset are we looking at for the graft? int subset = subsetOfInterest(); // this is the subset for m_leaf double thisNodeCount = 0; double knownCases = 0; boolean allKnown = true; // populate distribution for (int x = 0; x < data.numInstances(); x++) { Instance instance = data.instance(x); if (instance.isMissing(m_attIndex)) { allKnown = false; continue; } knownCases += instance.weight(); int subst = whichSubset(instance); if (subst == -1) continue; m_graftdistro.add(subst, instance); if (subst == subset) { // instance belongs at m_leaf thisNodeCount += instance.weight(); } } double factor = (knownCases == 0) ? (1.0 / (double) 2.0) : (thisNodeCount / knownCases); if (!allKnown) { for (int x = 0; x < data.numInstances(); x++) { if (data.instance(x).isMissing(m_attIndex)) { Instance instance = data.instance(x); int subst = whichSubset(instance); if (subst == -1) continue; instance.setWeight(instance.weight() * factor); m_graftdistro.add(subst, instance); } } } // if there are no cases at the leaf, make sure the desired // class is chosen, by setting counts to 0.01 if (m_graftdistro.perBag(subset) == 0) { double[] counts = new double[data.numClasses()]; counts[m_maxClass] = 0.01; m_graftdistro.add(subset, counts); } if (m_graftdistro.perBag((subset == 0) ? 1 : 0) == 0) { double[] counts = new double[data.numClasses()]; counts[(int) m_otherLeafMaxClass] = 0.01; m_graftdistro.add((subset == 0) ? 1 : 0, counts); } }
From source file:j48.NBTreeSplit.java
License:Open Source License
/** * Creates split on enumerated attribute. * * @exception Exception if something goes wrong *///from w w w . j ava2 s . c o m private void handleEnumeratedAttribute(Instances trainInstances) throws Exception { m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights); m_c45S.buildClassifier(trainInstances); if (m_c45S.numSubsets() == 0) { return; } m_errors = 0; Instance instance; Instances[] trainingSets = new Instances[m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { trainingSets[i] = new Instances(trainInstances, 0); } /* m_distribution = new Distribution(m_complexityIndex, trainInstances.numClasses()); */ int subset; for (int i = 0; i < trainInstances.numInstances(); i++) { instance = trainInstances.instance(i); subset = m_c45S.whichSubset(instance); if (subset > -1) { trainingSets[subset].add((Instance) instance.copy()); } else { double[] weights = m_c45S.weights(instance); for (int j = 0; j < m_complexityIndex; j++) { try { Instance temp = (Instance) instance.copy(); if (weights.length == m_complexityIndex) { temp.setWeight(temp.weight() * weights[j]); } else { temp.setWeight(temp.weight() / m_complexityIndex); } trainingSets[j].add(temp); } catch (Exception ex) { ex.printStackTrace(); System.err.println("*** " + m_complexityIndex); System.err.println(weights.length); System.exit(1); } } } } /* // compute weights (weights of instances per subset m_weights = new double [m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { m_weights[i] = trainingSets[i].sumOfWeights(); } Utils.normalize(m_weights); */ /* // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (!instance.isMissing(m_attIndex)) { // m_distribution.add((int)instance.value(m_attIndex),instance); trainingSets[(int)instances.value(m_attIndex)].add(instance); } else { // add these to the error count m_errors += instance.weight(); } } */ Random r = new Random(1); int minNumCount = 0; for (int i = 0; i < m_complexityIndex; i++) { if (trainingSets[i].numInstances() >= 5) { minNumCount++; // Discretize the sets Discretize disc = new Discretize(); disc.setInputFormat(trainingSets[i]); trainingSets[i] = Filter.useFilter(trainingSets[i], disc); trainingSets[i].randomize(r); trainingSets[i].stratify(5); NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable(); fullModel.buildClassifier(trainingSets[i]); // add the errors for this branch of the split m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r); } else { // if fewer than min obj then just count them as errors for (int j = 0; j < trainingSets[i].numInstances(); j++) { m_errors += trainingSets[i].instance(j).weight(); } } } // Check if there are at least five instances in at least two of the subsets // subsets. if (minNumCount > 1) { m_numSubsets = m_complexityIndex; } }
From source file:j48.NBTreeSplit.java
License:Open Source License
/** * Creates split on numeric attribute./* www .j a va 2 s . c o m*/ * * @exception Exception if something goes wrong */ private void handleNumericAttribute(Instances trainInstances) throws Exception { m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights); m_c45S.buildClassifier(trainInstances); if (m_c45S.numSubsets() == 0) { return; } m_errors = 0; Instances[] trainingSets = new Instances[m_complexityIndex]; trainingSets[0] = new Instances(trainInstances, 0); trainingSets[1] = new Instances(trainInstances, 0); int subset = -1; // populate the subsets for (int i = 0; i < trainInstances.numInstances(); i++) { Instance instance = trainInstances.instance(i); subset = m_c45S.whichSubset(instance); if (subset != -1) { trainingSets[subset].add((Instance) instance.copy()); } else { double[] weights = m_c45S.weights(instance); for (int j = 0; j < m_complexityIndex; j++) { Instance temp = (Instance) instance.copy(); if (weights.length == m_complexityIndex) { temp.setWeight(temp.weight() * weights[j]); } else { temp.setWeight(temp.weight() / m_complexityIndex); } trainingSets[j].add(temp); } } } /* // compute weights (weights of instances per subset m_weights = new double [m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { m_weights[i] = trainingSets[i].sumOfWeights(); } Utils.normalize(m_weights); */ Random r = new Random(1); int minNumCount = 0; for (int i = 0; i < m_complexityIndex; i++) { if (trainingSets[i].numInstances() > 5) { minNumCount++; // Discretize the sets Discretize disc = new Discretize(); disc.setInputFormat(trainingSets[i]); trainingSets[i] = Filter.useFilter(trainingSets[i], disc); trainingSets[i].randomize(r); trainingSets[i].stratify(5); NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable(); fullModel.buildClassifier(trainingSets[i]); // add the errors for this branch of the split m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r); } else { for (int j = 0; j < trainingSets[i].numInstances(); j++) { m_errors += trainingSets[i].instance(j).weight(); } } } // Check if minimum number of Instances in at least two // subsets. if (minNumCount > 1) { m_numSubsets = m_complexityIndex; } }