List of usage examples for weka.core Instances size
@Override publicint size()
From source file:meddle.PredictByDomainOS.java
License:Open Source License
private static boolean predictOneFlow(String line, String domainOS) { if (!domainOSModel.containsKey(domainOS)) return false; else {// w w w .j a v a 2 s. c om try { Classifier classifier = domainOSModel.get(domainOS); Map<String, Integer> fi = domainOSFeature.get(domainOS); Instances structure = domainOSStruct.get(domainOS); Instance current = getInstance(line, fi, fi.size()); Instances is = new Instances(structure); is.setClassIndex(is.numAttributes() - 1); is.add(current); current = is.get(is.size() - 1); current.setClassMissing(); double predicted = classifier.classifyInstance(current); if (predicted > 0) { return true; } else return false; } catch (Exception e) { e.printStackTrace(); } } return false; }
From source file:mlda.util.Utils.java
License:Open Source License
/** * Get array of ImbalancedFeature with labels frequency * /* w w w . java2 s . c o m*/ * @param dataset Multi-label dataset * @return Array of ImbalancedFeature with the labels frequency */ public static ImbalancedFeature[] getAppearancesPerLabel(MultiLabelInstances dataset) { int[] labelIndices = dataset.getLabelIndices(); ImbalancedFeature[] labels = new ImbalancedFeature[labelIndices.length]; Instances instances = dataset.getDataSet(); int appearances = 0; Attribute currentAtt; for (int i = 0; i < labelIndices.length; i++) { currentAtt = instances.attribute(labelIndices[i]); appearances = 0; for (int j = 0; j < instances.size(); j++) { if (instances.instance(j).value(currentAtt) == 1.0) { appearances++; } } labels[i] = new ImbalancedFeature(currentAtt.name(), appearances); } return labels; }
From source file:mlda.util.Utils.java
License:Open Source License
/** * Calculate IRs of the ImbalancedFeatures * /*from ww w . j av a 2s . c o m*/ * @param dataset Multi-label dataset * @param labels Labels of the dataset as ImbalancedFeature objects * @return Array of ImbalancedFeature objects with calculated IR */ public static ImbalancedFeature[] getImbalancedWithIR(MultiLabelInstances dataset, ImbalancedFeature[] labels) { int[] labelIndices = dataset.getLabelIndices(); ImbalancedFeature[] labels_imbalanced = new ImbalancedFeature[labelIndices.length]; Instances instances = dataset.getDataSet(); int nOnes = 0, nZeros = 0, maxAppearance = 0; double IRIntraClass; double variance; double IRInterClass; double mean = dataset.getNumInstances() / 2; Attribute current; ImbalancedFeature currentLabel; for (int i = 0; i < labelIndices.length; i++) //for each label { nZeros = 0; nOnes = 0; current = instances.attribute(labelIndices[i]); //current label for (int j = 0; j < instances.size(); j++) //for each instance { if (instances.instance(j).value(current) == 1.0) { nOnes++; } else { nZeros++; } } try { if (nZeros == 0 || nOnes == 0) { IRIntraClass = 0; } else if (nZeros > nOnes) { IRIntraClass = (double) nZeros / nOnes; } else { IRIntraClass = (double) nOnes / nZeros; } } catch (Exception e1) { IRIntraClass = 0; } variance = (Math.pow((nZeros - mean), 2) + Math.pow((nOnes - mean), 2)) / 2; currentLabel = getLabelByName(current.name(), labels); maxAppearance = labels[0].getAppearances(); if (currentLabel.getAppearances() <= 0) { IRInterClass = Double.NaN; } else { IRInterClass = (double) maxAppearance / currentLabel.getAppearances(); } labels_imbalanced[i] = new ImbalancedFeature(current.name(), currentLabel.getAppearances(), IRInterClass, IRIntraClass, variance); } return labels_imbalanced; }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * Use inclusion probability to discover the cluster "nearest" the provided instance * * @param D instance set to sort from/* w w w . j a v a2 s . c om*/ * @param x instance in question * @return sorted set of clusters, ordered by inc */ protected final NearestInstanceTuple[] findNearestNeighbors(Instances D, Instance x) { NearestInstanceTuple[] ret = new NearestInstanceTuple[D.size()]; double[] xVals = x.toDoubleArray(); int idx = 0; for (Instance n : D) { ret[idx++] = new NearestInstanceTuple(n, VectorDistances.distance(xVals, n.toDoubleArray(), D, this.distanceStrategyOption.getChosenIndex())); } // end for Arrays.parallelSort(ret); return ret; }
From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java
License:Open Source License
/** * * @param testInstances instance set to evaluate accuracy * @return number of instances actually tested */// www. j av a 2 s .co m private int test(Instances testInstances) { this.monitor.setCurrentActivityDescription("Testing Instances"); int ret = testInstances.size(); int novelClassLabel = testInstances.numClasses(); int outlierLabel = novelClassLabel + 1; // For latent label outliers that have reached their deadline, we must now make a decision: while (!this.pendingFinalLabelInstQueue.isEmpty() && this.pendingFinalLabelInstQueue.peek().deadline <= this.instancesProcessed) { TimeBoxedInstance ti = this.pendingFinalLabelInstQueue.pop(); int y = (int) ti.inst.classValue(); double[] prediction = null; if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) { Instance novelInst = (Instance) ti.inst.copy(); //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset())); //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); novelInst.setWeight(NOVEL_WEIGHT); prediction = learner.getVotesForInstance(novelInst); evaluator.addResult(novelInst, prediction); // Outlier out of time. Remove it } else { prediction = learner.getVotesForInstance(ti.inst); evaluator.addResult(ti.inst, prediction); // Outlier out of time. Remove it } this.cm.add(weka.core.Utils.maxIndex(prediction), ti.inst.classValue()); } // Run accuracy test for current instance(s) for (Instance i : testInstances) { int y = (int) i.classValue(); double[] prediction = null; Instance instToActuallyPredict = i; // If novel, make a special instance if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) { instToActuallyPredict = (Instance) i.copy(); //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset())); //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); // WARNING - this crashes other algorithms if not also done on training! instToActuallyPredict.setWeight(NOVEL_WEIGHT); } prediction = learner.getVotesForInstance(instToActuallyPredict); if ((prediction.length > outlierLabel) && (prediction[outlierLabel] > (1.0 / prediction.length))) { this.pendingFinalLabelInstQueue.add(new TimeBoxedInstance(i, this.instancesProcessed, this.labelDeadlineOption.getValue(), prediction)); // Delay accuracy metrics until stale time } else { evaluator.addResult(instToActuallyPredict, prediction); // Not an outlier, so treat it like normal this.cm.add(weka.core.Utils.maxIndex(prediction), i.classValue()); } } // end for assert this.pendingFinalLabelInstQueue.size() < (this.labelDeadlineOption.getValue() + 1) : "Cache 'pendingFinalLabelInstQueue' is larger than designed."; return ret; }
From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java
public void updateClassifier(Instances data) { System.out.println("########## UPDATE PHASE ############"); this.updates++; // Performance berechnen und Adwin befllen System.out.println("Data size: " + data.size()); System.out.println("Determine Performance..."); determinePerformance(data, baseClassifier); accuracy_array.add((double) counter_one / batchSize.getValue()); if (change) { accuracy_array.clear();/* w ww . j av a 2 s . c om*/ } if (!adwinOnly.isSet()) { if (variance != 0) { old_variance = variance; } variance = 0; average = 0; if (accuracy_array.size() > 2) { for (int i = 0; i < accuracy_array.size(); i++) { average += accuracy_array.get(i); } average = average / accuracy_array.size(); for (int i = 0; i < accuracy_array.size(); i++) { variance += Math.pow(average - accuracy_array.get(i), 2); } variance = variance / accuracy_array.size(); } System.out.println("Varianz: " + variance); System.out.println("epsilon: " + real_variance_epsilon); if (change) { maxBatchesToKeep = batchesToKeep.getValue(); } } if ((this.ADError.getWidth() / batchSize.getValue()) < 1) { this.instanceStore.setNumBatches(1); } else { if (adwinOnly.isSet()) { this.instanceStore.setNumBatches(this.ADError.getWidth() / batchSize.getValue()); } else { if (Math.abs((double) variance - old_variance) < (double) real_variance_epsilon) { maxBatchesToKeep = this.instanceStore.numBatches; System.out.println("Batchsize wird nicht erhht wegen Varianz"); } this.instanceStore .setNumBatches(Math.min(maxBatchesToKeep, this.ADError.getWidth() / batchSize.getValue())); //this.instanceStore.setNumBatches(this.ADError.getWidth() / batchSize.getValue()); } } // wenn die Fenstergre maximal ist, change wieder auf "false" setzen if (this.instanceStore.numBatches == batchesToKeep.getValue()) { //change = false; } // First: merge the new instances to the "Instance Store" this.instanceStore.addInstances(data); System.out.println("size ADWIN: " + this.ADError.getWidth()); System.out.println("size InstanceStore: " + this.instanceStore.numBatches); Instances currentStore = this.instanceStore.getInstances(); System.out.println("Update at Instance: " + this.numInstances + " | Size of Instance store (updates:" + this.updates + "): " + currentStore.size()); // Turn the instances into a binary learning problem to learn the decision space where the original classifier was wrong //writeArff("C:\\StAtIC\\experiments\\orig.arff", currentStore); //writeArff("C:\\StAtIC\\experiments\\modded.arff", this.reDefinedClasses); // System.exit(9525356); // Determine the subsets of instances which are covered by the rules (that are not the default rule) if (this.useBaseClassAsAttribute.isSet()) { currentStore = addBaseClassToInstances(currentStore); } //if(change) { System.out.println("Redefine Problem..."); this.reDefinedClasses = redefineProblem(currentStore); // Now: learn the error regions with a specially adapted or a normal classifier: try { System.out.println("Build Classifier..."); this.regionDecider = new DSALearnerWrapper(getDecisionSpaceClassifier()); regionDecider.buildClassifier(reDefinedClasses); // System.out.println("Error Space Classifier:"); System.out.println(regionDecider.toString()); // Todo remove this out } catch (Exception e) { System.err.println("Error building region decider"); System.err.println(e.getStackTrace()); System.err.println(e.getMessage()); System.exit(123452345); } System.out.println("Determine Subsets..."); this.subsets = determineSubsets(currentStore, regionDecider); // Determine the performance of the BASE classifier for each of those subsets //this.basePerfOnSubset = determineBasePerformanceOnSubsets(this.subsets, baseClassifier); // Create individual models for the subsets this.regionPatches = createPatches(this.subsets, this.basePerfOnSubset); System.out.println("Region Decision Subsets: " + subsets.size()); //} // System.exit(18567820); System.out.println("##############################\n\n\n"); }
From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java
/** * Learns a specific subset classifier (of the same type as the base * classifier) to improve accuracy on the regions that performed bad before. * * @param subsets//from www. ja v a 2s. co m * @param basePerformance * @return */ private Vector createPatches(Vector subsets, Vector basePerformance) { Vector patches = new Vector(); System.out.println("Creating patches: #" + subsets.size()); try { for (int d = 0; d < subsets.size(); d++) { Instances set = (Instances) subsets.get(d); // if(this.useBaseClassAsAttribute.isSet()) { // writeArff("C:\\StAtIC\\experiments\\set"+d+".arff", set); // } // System.out.println("Set " + d + " size: " + set.size()); Classifier patch; if (set.size() < 5) // Too small to do anything properly { patch = null; // null will then default to base classifier } else { patch = getPatchClassifier(); patch.buildClassifier(set); } patches.add(d, patch); } } catch (Exception e) { System.err.println("Error building patches:"); System.err.println(e.getMessage()); } // System.out.println("\n--- Patches ------------"); // for (int i = 0; i < patches.size(); i++) { // Classifier tmp = (Classifier) patches.get(i); // if (tmp != null) { // System.out.print("Patch " + i+" - "); // System.out.println(tmp); // } // } // System.out.println("------------------------"); // System.exit(45768545); return patches; }
From source file:moa.tud.ke.patching.AdaptivePatchingTwoAdwins.java
public void updateClassifier(Instances data) { System.out.println("########## UPDATE PHASE ############"); this.updates++; // Performance berechnen und Adwin befllen System.out.println("Data size: " + data.size()); System.out.println("Determine Performance..."); determinePerformance(data, baseClassifier); if ((this.ADError.getWidth() / batchSize.getValue()) < 1) { this.instanceStore.setNumBatches(1); } else {/*www .jav a2 s. c o m*/ if (change) { corrected_adwin_size = this.ADError.getWidth() / batchSize.getValue() - 1; } if (!changeFine) { maxBatchesToKeep = this.instanceStore.numBatches; corrected_adwin_size++; } else { maxBatchesToKeep = Math.max(1, this.ADError.getWidth() / batchSize.getValue() - corrected_adwin_size); } this.instanceStore .setNumBatches(Math.min(maxBatchesToKeep, this.ADError.getWidth() / batchSize.getValue())); //this.instanceStore.setNumBatches(this.ADError.getWidth() / batchSize.getValue()); } // wenn die Fenstergre maximal ist, change wieder auf "false" setzen if (this.instanceStore.numBatches == batchesToKeep.getValue()) { //change = false; } // First: merge the new instances to the "Instance Store" this.instanceStore.addInstances(data); System.out.println("size ADWIN: " + this.ADError.getWidth()); System.out.println("size InstanceStore: " + this.instanceStore.numBatches); Instances currentStore = this.instanceStore.getInstances(); System.out.println("Update at Instance: " + this.numInstances + " | Size of Instance store (updates:" + this.updates + "): " + currentStore.size()); // Turn the instances into a binary learning problem to learn the decision space where the original classifier was wrong //writeArff("C:\\StAtIC\\experiments\\orig.arff", currentStore); //writeArff("C:\\StAtIC\\experiments\\modded.arff", this.reDefinedClasses); // System.exit(9525356); // Determine the subsets of instances which are covered by the rules (that are not the default rule) if (this.useBaseClassAsAttribute.isSet()) { currentStore = addBaseClassToInstances(currentStore); } //if(change) { System.out.println("Redefine Problem..."); this.reDefinedClasses = redefineProblem(currentStore); // Now: learn the error regions with a specially adapted or a normal classifier: try { System.out.println("Build Classifier..."); this.regionDecider = new DSALearnerWrapper(getDecisionSpaceClassifier()); regionDecider.buildClassifier(reDefinedClasses); // System.out.println("Error Space Classifier:"); System.out.println(regionDecider.toString()); // Todo remove this out } catch (Exception e) { System.err.println("Error building region decider"); System.err.println(e.getStackTrace()); System.err.println(e.getMessage()); System.exit(123452345); } System.out.println("Determine Subsets..."); this.subsets = determineSubsets(currentStore, regionDecider); // Determine the performance of the BASE classifier for each of those subsets //this.basePerfOnSubset = determineBasePerformanceOnSubsets(this.subsets, baseClassifier); // Create individual models for the subsets this.regionPatches = createPatches(this.subsets, this.basePerfOnSubset); System.out.println("Region Decision Subsets: " + subsets.size()); //} // System.exit(18567820); System.out.println("##############################\n\n\n"); }
From source file:moa.tud.ke.patching.InstanceStore.java
public void cleanBatch(int index, int size) { Instances inst = getBatch(index); System.out.println("Size Batch: " + inst.size()); while (inst.size() > size) { inst.delete(0);// w w w .j av a 2 s.com } System.out.println("Size Batch: " + inst.size()); }
From source file:moa.tud.ke.patching.Patching.java
/** * Starts the update phase and executes multiple steps for the update such * as learning the error regions and building patches for them. * * @param data/* w w w . ja va2 s .com*/ */ public void updateClassifier(Instances data) { System.out.println("########## UPDATE PHASE ############"); this.updates++; // First: merge the new instances to the "Instance Store" this.instanceStore.addInstances(data); Instances currentStore = this.instanceStore.getInstances(); System.out.println("Update at Instance: " + this.numInstances + " | Size of Instance store (updates:" + this.updates + "): " + currentStore.size()); // Turn the instances into a binary learning problem to learn the decision space where the original classifier was wrong this.reDefinedClasses = redefineProblem(currentStore); // Now: learn the error regions with a specially adapted or a normal classifier: try { this.regionDecider = new DSALearnerWrapper(getDecisionSpaceClassifier()); regionDecider.buildClassifier(reDefinedClasses); } catch (Exception e) { System.err.println("Error building region decider"); System.err.println(e.getStackTrace()); System.err.println(e.getMessage()); } // Optional: add the original prediction as an additional attribute: if (this.useBaseClassAsAttribute.isSet()) { currentStore = addBaseClassToInstances(currentStore); } // Determine the subsets of instances which are covered by the rules (that are not the default rule) this.subsets = determineSubsets(currentStore, regionDecider); System.out.println("Region Decision Subsets: " + subsets.size()); // Create individual models for the subsets this.regionPatches = createPatches(this.subsets, this.basePerfOnSubset); System.out.println("##############################\n\n\n"); }