List of usage examples for weka.core Instance setWeight
public void setWeight(double weight);
From source file:moa.clusterer.FeS2.java
License:Apache License
/** * @return training accuracy//from w w w. ja va 2s . c o m */ private double trainPerceptron() { // Train the perceptron from warmup phase clustering final int epochs = 20; final int numberOfPerceptrons = 10; final int MEMBER = 0; final int OUTLIER = 1; double accuracySum = 0; double accuracyCount = 0; this.outlierPerceptronTrainingSet.clear(); Random rng = new Random(this.randomSeed); // Generate training set for (Riffle thisCluster : this.clusters) { for (Riffle thatCluster : this.clusters) { double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER; for (Instance x : thatCluster.getHeader()) { Instance pseudoPt = makePerceptronInstance(thisCluster, x); pseudoPt.setClassValue(groundTruth); this.outlierPerceptronTrainingSet.add(pseudoPt); } } } this.outlierPerceptronTrainingSet.parallelStream().forEach((x) -> { x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances()); }); // Boost it this.perceptrons = new Perceptron[numberOfPerceptrons]; this.pweights = new double[numberOfPerceptrons]; for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) { // Discover new weak learner Perceptron candidatePerceptron = new Perceptron(); candidatePerceptron.prepareForUse(); candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1); for (int epoch = 0; epoch < epochs; epoch++) { for (Instance x : this.outlierPerceptronTrainingSet) { if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling candidatePerceptron.trainOnInstance(x); } } } //end epochs // Evaluate weak learner double errorFunctionSum = 0; double weightSum = 0; for (Instance x : this.outlierPerceptronTrainingSet) { if (!candidatePerceptron.correctlyClassifies(x)) { errorFunctionSum += x.weight(); } } // adjust training weights for (Instance x : this.outlierPerceptronTrainingSet) { double newWeight = x.weight(); if (candidatePerceptron.correctlyClassifies(x)) { newWeight *= errorFunctionSum / (1.0 - errorFunctionSum); if (Double.isNaN(newWeight)) { newWeight = weka.core.Utils.SMALL; } x.setWeight(newWeight); } weightSum += newWeight; } // Normalize for (Instance x : this.outlierPerceptronTrainingSet) { x.setWeight(x.weight() / weightSum); } // Add to ensemble double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum); this.perceptrons[perceptronIdx] = candidatePerceptron; this.pweights[perceptronIdx] = newPerceptronWeight; } // end numPerceptrons // Check training error accuracySum = 0; accuracyCount = 0; for (Instance x : this.outlierPerceptronTrainingSet) { if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) { accuracySum++; } accuracyCount++; } double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0; this.outlierPerceptronTrainingSet.clear(); return trainingAccuracy; }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * * @param c cluster that is being compared against * @param x real data instance// www. j av a 2 s . c o m * @return DenseInstance made to work with the outlier-detecting perceptron */ private Instance makePerceptronInstance(Riffle c, Instance x) { Instance pseudoPoint = new DenseInstance(this.outlierPerceptronTrainingSet.numAttributes()); pseudoPoint.setDataset(outlierPerceptronTrainingSet); double p = c.getInclusionProbability(x); double r = (c.getRadius() != 0) ? c.getRadius() : 1; //double w = c.getWeight(); double N = Math.min(c.size(), this.cacheSizeOption.getValue()); double d = c.getCenterDistance(x); double logP = (p == 0) ? 0 : Math.log(p); double logDR = (r == 0 || (d / r) == 0) ? 0 : Math.log(d / r); pseudoPoint.setValue(0, logP); pseudoPoint.setValue(1, logDR); pseudoPoint.setValue(2, logDR * logP); pseudoPoint.setValue(3, logP - Math.log(1.0 / Math.pow(2.0 * N, this.universalCluster.getHeader().numAttributes()))); pseudoPoint.setClassValue(0); pseudoPoint.setWeight(0.0); return pseudoPoint; }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * @return training accuracy//from w ww.ja v a2s.co m */ private double trainPerceptron() { // Train the perceptron from warmup phase clustering final int epochs = 20; final int numberOfPerceptrons = 1; final int MEMBER = 0; final int OUTLIER = 1; double accuracySum = 0; double accuracyCount = 0; this.outlierPerceptronTrainingSet.clear(); Random rng = new Random(this.randomSeed); // Generate training set for (Riffle thisCluster : this.clusters) { for (Instance x : thisCluster.getHeader()) { Instance pseudoPt = makePerceptronInstance(thisCluster, x); for (Riffle thatCluster : this.clusters) { double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER; pseudoPt.setClassValue(groundTruth); this.outlierPerceptronTrainingSet.add(pseudoPt); } } } for (Instance x : this.outlierPerceptronTrainingSet) { x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances()); } ; // Boost it this.perceptrons = new Perceptron[numberOfPerceptrons]; this.pweights = new double[numberOfPerceptrons]; for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) { // Discover new weak learner Perceptron candidatePerceptron = new Perceptron(); candidatePerceptron.prepareForUse(); candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1); for (int epoch = 0; epoch < epochs; epoch++) { for (Instance x : this.outlierPerceptronTrainingSet) { if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling candidatePerceptron.trainOnInstance(x); } } } //end epochs // Evaluate weak learner double errorFunctionSum = 0; double weightSum = 0; for (Instance x : this.outlierPerceptronTrainingSet) { if (!candidatePerceptron.correctlyClassifies(x)) { errorFunctionSum += x.weight(); } } // adjust training weights for (Instance x : this.outlierPerceptronTrainingSet) { double newWeight = x.weight(); if (candidatePerceptron.correctlyClassifies(x)) { newWeight *= errorFunctionSum / (1.0 - errorFunctionSum); if (Double.isNaN(newWeight)) { newWeight = weka.core.Utils.SMALL; } x.setWeight(newWeight); } weightSum += newWeight; } // Normalize for (Instance x : this.outlierPerceptronTrainingSet) { x.setWeight(x.weight() / weightSum); } // Add to ensemble double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum); this.perceptrons[perceptronIdx] = candidatePerceptron; this.pweights[perceptronIdx] = newPerceptronWeight; } // end numPerceptrons // Check training error accuracySum = 0; accuracyCount = 0; for (Instance x : this.outlierPerceptronTrainingSet) { if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) { accuracySum++; } accuracyCount++; } double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0; this.outlierPerceptronTrainingSet.clear(); return trainingAccuracy; }
From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java
License:Open Source License
/** * * * @return instances retrieved from stream *//* w w w . jav a 2s.c o m*/ private Instances getChunk() { Instances chunk = new Instances(stream.getHeader(), this.chunkSizeOption.getValue()); // Add "chunk size" number of instances to test directly from the stream (first time we see each instance): while (stream.hasMoreInstances() && chunk.numInstances() < this.chunkSizeOption.getValue()) { Instance inst = stream.nextInstance(); this.instancesProcessed++; chunk.add(inst); if (this.inWarmupPhase) { // For warmup phase, use full and immediate training inst.setWeight(1.0); latentTrainingInstQueue.addFirst(new TimeBoxedInstance(inst, this.instancesProcessed, 0, null)); } else if (rng.nextFloat() > this.trainingFractionOption.getValue()) { // Select a portion for latent training set by setting non-training instance weight to zero. // place at beginning of the queue/list and record intended activation 'time' for immediate unsupervised 'training' inst.setWeight(0.0); latentTrainingInstQueue.addFirst(new TimeBoxedInstance(inst, this.instancesProcessed, 0, null)); } else { if (this.sendZeroWeightsOption.isSet()) { Instance unsupervisedInstance = (Instance) inst.copy(); unsupervisedInstance.setWeight(0.0); //unsupervisedInstance.setClassValue(0); latentTrainingInstQueue.addFirst( new TimeBoxedInstance(unsupervisedInstance, this.instancesProcessed, 0, null)); } // place at end of the queue/list and record intended activation 'time' for latent supervised training latentTrainingInstQueue.addLast(new TimeBoxedInstance(inst, this.instancesProcessed, this.trainingTimeDelayOption.getValue(), null)); } // MOA framework housekeeping and reporting... if ((instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES) == 0) { this.monitor.setCurrentActivityDescription("Updating Metrics"); if (monitor.taskShouldAbort()) { chunk.clear(); return chunk; } long estimatedRemainingInstances = stream.estimatedRemainingInstances(); if (this.instanceLimitOption.getValue() > 0) { long maxRemaining = this.instanceLimitOption.getValue() - instancesProcessed; if ((estimatedRemainingInstances < 0) || (maxRemaining < estimatedRemainingInstances)) { estimatedRemainingInstances = maxRemaining; } } monitor.setCurrentActivityFractionComplete( (double) instancesProcessed / (double) (instancesProcessed + estimatedRemainingInstances)); } } // end while return chunk; }
From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java
License:Open Source License
/** * * @param testInstances instance set to evaluate accuracy * @return number of instances actually tested *///ww w . ja v a 2 s.c om private int test(Instances testInstances) { this.monitor.setCurrentActivityDescription("Testing Instances"); int ret = testInstances.size(); int novelClassLabel = testInstances.numClasses(); int outlierLabel = novelClassLabel + 1; // For latent label outliers that have reached their deadline, we must now make a decision: while (!this.pendingFinalLabelInstQueue.isEmpty() && this.pendingFinalLabelInstQueue.peek().deadline <= this.instancesProcessed) { TimeBoxedInstance ti = this.pendingFinalLabelInstQueue.pop(); int y = (int) ti.inst.classValue(); double[] prediction = null; if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) { Instance novelInst = (Instance) ti.inst.copy(); //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset())); //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); novelInst.setWeight(NOVEL_WEIGHT); prediction = learner.getVotesForInstance(novelInst); evaluator.addResult(novelInst, prediction); // Outlier out of time. Remove it } else { prediction = learner.getVotesForInstance(ti.inst); evaluator.addResult(ti.inst, prediction); // Outlier out of time. Remove it } this.cm.add(weka.core.Utils.maxIndex(prediction), ti.inst.classValue()); } // Run accuracy test for current instance(s) for (Instance i : testInstances) { int y = (int) i.classValue(); double[] prediction = null; Instance instToActuallyPredict = i; // If novel, make a special instance if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) { instToActuallyPredict = (Instance) i.copy(); //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset())); //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); // WARNING - this crashes other algorithms if not also done on training! instToActuallyPredict.setWeight(NOVEL_WEIGHT); } prediction = learner.getVotesForInstance(instToActuallyPredict); if ((prediction.length > outlierLabel) && (prediction[outlierLabel] > (1.0 / prediction.length))) { this.pendingFinalLabelInstQueue.add(new TimeBoxedInstance(i, this.instancesProcessed, this.labelDeadlineOption.getValue(), prediction)); // Delay accuracy metrics until stale time } else { evaluator.addResult(instToActuallyPredict, prediction); // Not an outlier, so treat it like normal this.cm.add(weka.core.Utils.maxIndex(prediction), i.classValue()); } } // end for assert this.pendingFinalLabelInstQueue.size() < (this.labelDeadlineOption.getValue() + 1) : "Cache 'pendingFinalLabelInstQueue' is larger than designed."; return ret; }
From source file:mulan.transformations.multiclass.CopyWeight.java
License:Open Source License
/** * Transforms a multi-label instance to a list of single-label instances, * one for each of the labels that annotate the instance, by copying the * feature vector and attaching a weight equal to 1/(list size). * * @param instance a multi-label instance * @return a list with the transformed single-label instances *//*w ww .j av a 2s . c om*/ @Override List<Instance> transformInstance(Instance instance) { List<Instance> copy = super.transformInstance(instance); for (Instance anInstance : copy) { anInstance.setWeight(1.0 / copy.size()); } return copy; }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.WekaMIContourDataClassifier.java
License:Open Source License
/** * {@inheritDoc}/* w w w . j a va 2 s . co m*/ */ @Override public void buildClassifier(ContourDataGrid cData, VectorDataList bgData) throws Exception { // transform input data to weka mi-instances m_data = initDataset(cData.numFeatures(), 2, cData.totalLength() + bgData.numVectors(), cData.width()); for (int r = 0; r < cData.totalLength(); r++) { Instances bagData = new Instances(m_data.attribute(1).relation(), cData.width()); for (int c = 0; c < cData.width(); c++) { int vecIdx = cData.getVectorIdx(c, r); Instance inst = new DenseInstance(cData.weight(vecIdx), cData.getVector(vecIdx)); inst.setDataset(bagData); bagData.add(inst); } int value = m_data.attribute(1).addRelation(bagData); Instance newBag = new DenseInstance(3); newBag.setValue(0, r); // bag id newBag.setValue(2, 1); // class attribute newBag.setValue(1, value); newBag.setWeight(1); newBag.setDataset(m_data); m_data.add(newBag); } for (int i = 0; i < bgData.numVectors(); i++) { Instances bagData = new Instances(m_data.attribute(1).relation(), cData.width()); Instance inst = new DenseInstance(bgData.weight(i), bgData.getVector(i)); inst.setDataset(bagData); bagData.add(inst); int value = m_data.attribute(1).addRelation(bagData); Instance newBag = new DenseInstance(3); newBag.setValue(0, cData.totalLength() + i); newBag.setValue(2, 0); newBag.setValue(1, value); newBag.setWeight(1); newBag.setDataset(m_data); m_data.add(newBag); } m_classifier.buildClassifier(m_data); }
From source file:org.scripps.branch.classifier.ManualTree.java
License:Open Source License
/** * Splits instances into subsets based on the given split. * /*w w w .j a v a 2 s .c o m*/ * @param data * the data to work with * @return the subsets of instances * @throws Exception * if something goes wrong */ protected Instances[] splitData(Instances data) throws Exception { // Allocate array of Instances objects Instances[] subsets = new Instances[m_Prop.length]; for (int i = 0; i < m_Prop.length; i++) { subsets[i] = new Instances(data, data.numInstances()); } if (m_Attribute >= data.numAttributes()) { if (m_Attribute >= listOfFc.size() + data.numAttributes() - 1) { CustomSet cSet = getReqCustomSet(m_Attribute - (data.numAttributes() - 1 + listOfFc.size()), cSetList); JsonNode vertices = mapper.readTree(cSet.getConstraints()); ArrayList<double[]> attrVertices = generateVerticesList(vertices); List<Attribute> aList = generateAttributeList(cSet, data, d); double[] testPoint = new double[2]; int ctr = 0; for (int k = 0; k < data.numInstances(); k++) { ctr = 0; for (Attribute a : aList) { testPoint[ctr] = data.instance(k).value(a); ctr++; } int check = checkPointInPolygon(attrVertices, testPoint); subsets[check].add(data.instance(k)); continue; } } else { Classifier fc; double predictedClass; // Go through the data for (int i = 0; i < data.numInstances(); i++) { // Get instance Instance inst = data.instance(i); String classifierId = getKeyinMap(listOfFc, m_Attribute, data); fc = listOfFc.get(classifierId); predictedClass = fc.classifyInstance(inst); if (predictedClass != Instance.missingValue()) { subsets[(int) predictedClass].add(inst); continue; } // Else throw an exception throw new IllegalArgumentException("Unknown attribute type"); } } } else { // Go through the data for (int i = 0; i < data.numInstances(); i++) { // Get instance Instance inst = data.instance(i); // Does the instance have a missing value? if (inst.isMissing(m_Attribute)) { // Split instance up for (int k = 0; k < m_Prop.length; k++) { if (m_Prop[k] > 0) { Instance copy = (Instance) inst.copy(); copy.setWeight(m_Prop[k] * inst.weight()); subsets[k].add(copy); } } // Proceed to next instance continue; } // Do we have a nominal attribute? if (data.attribute(m_Attribute).isNominal()) { subsets[(int) inst.value(m_Attribute)].add(inst); // Proceed to next instance continue; } // Do we have a numeric attribute? if (data.attribute(m_Attribute).isNumeric()) { subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst); // Proceed to next instance continue; } // Else throw an exception throw new IllegalArgumentException("Unknown attribute type"); } } // Save memory for (int i = 0; i < m_Prop.length; i++) { subsets[i].compactify(); } // Return the subsets return subsets; }
From source file:org.wikipedia.miner.annotation.Disambiguator.java
License:Open Source License
@SuppressWarnings("unchecked") private void weightTrainingInstances() { double positiveInstances = 0; double negativeInstances = 0; Enumeration<Instance> e = trainingData.enumerateInstances(); while (e.hasMoreElements()) { Instance i = (Instance) e.nextElement(); double isValidSense = i.value(3); if (isValidSense == 0) positiveInstances++;/*from w w w. j a v a 2 s . co m*/ else negativeInstances++; } double p = (double) positiveInstances / (positiveInstances + negativeInstances); e = trainingData.enumerateInstances(); while (e.hasMoreElements()) { Instance i = (Instance) e.nextElement(); double isValidSense = i.value(3); if (isValidSense == 0) i.setWeight(0.5 * (1.0 / p)); else i.setWeight(0.5 * (1.0 / (1 - p))); } }
From source file:org.wikipedia.miner.annotation.weighting.LinkDetector.java
License:Open Source License
@SuppressWarnings("unchecked") private void weightTrainingInstances() { double positiveInstances = 0; double negativeInstances = 0; Enumeration<Instance> e = trainingData.enumerateInstances(); while (e.hasMoreElements()) { Instance i = e.nextElement(); double isValidSense = i.value(attributes.size() - 1); if (isValidSense == 0) positiveInstances++;/*from www .j a v a 2 s .c o m*/ else negativeInstances++; } double p = (double) positiveInstances / (positiveInstances + negativeInstances); System.out.println("stats: positive=" + p + ", negative=" + (1 - p)); e = trainingData.enumerateInstances(); while (e.hasMoreElements()) { Instance i = e.nextElement(); double isLinked = i.value(attributes.size() - 1); if (isLinked == 0) i.setWeight(0.5 * (1.0 / p)); else i.setWeight(0.5 * (1.0 / (1 - p))); } }