Example usage for weka.core Instances size

Introduction

In this page you can find the example usage for weka.core Instances size.

Prototype


@Override
publicint size()

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:meddle.PredictByDomainOS.java

License:Open Source License

private static boolean predictOneFlow(String line, String domainOS) {
    if (!domainOSModel.containsKey(domainOS))
        return false;
    else {// w  w w  .j  a  v  a  2  s. c  om
        try {
            Classifier classifier = domainOSModel.get(domainOS);
            Map<String, Integer> fi = domainOSFeature.get(domainOS);
            Instances structure = domainOSStruct.get(domainOS);
            Instance current = getInstance(line, fi, fi.size());

            Instances is = new Instances(structure);
            is.setClassIndex(is.numAttributes() - 1);
            is.add(current);
            current = is.get(is.size() - 1);
            current.setClassMissing();
            double predicted = classifier.classifyInstance(current);
            if (predicted > 0) {
                return true;
            } else
                return false;
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    return false;
}

From source file:mlda.util.Utils.java

License:Open Source License

/**
 * Get array of ImbalancedFeature with labels frequency
 * /*  w w w .  java2  s . c o m*/
 * @param dataset Multi-label dataset
 * @return Array of ImbalancedFeature with the labels frequency
 */
public static ImbalancedFeature[] getAppearancesPerLabel(MultiLabelInstances dataset) {
    int[] labelIndices = dataset.getLabelIndices();

    ImbalancedFeature[] labels = new ImbalancedFeature[labelIndices.length];

    Instances instances = dataset.getDataSet();

    int appearances = 0;
    Attribute currentAtt;

    for (int i = 0; i < labelIndices.length; i++) {
        currentAtt = instances.attribute(labelIndices[i]);
        appearances = 0;

        for (int j = 0; j < instances.size(); j++) {
            if (instances.instance(j).value(currentAtt) == 1.0) {
                appearances++;
            }
        }
        labels[i] = new ImbalancedFeature(currentAtt.name(), appearances);
    }

    return labels;
}

From source file:mlda.util.Utils.java

License:Open Source License

/**
 * Calculate IRs of the ImbalancedFeatures
 * /*from ww  w .  j av  a 2s .  c o  m*/
 * @param dataset Multi-label dataset
 * @param labels Labels of the dataset as ImbalancedFeature objects
 * @return Array of ImbalancedFeature objects with calculated IR
 */
public static ImbalancedFeature[] getImbalancedWithIR(MultiLabelInstances dataset, ImbalancedFeature[] labels) {
    int[] labelIndices = dataset.getLabelIndices();

    ImbalancedFeature[] labels_imbalanced = new ImbalancedFeature[labelIndices.length];

    Instances instances = dataset.getDataSet();

    int nOnes = 0, nZeros = 0, maxAppearance = 0;
    double IRIntraClass;
    double variance;
    double IRInterClass;
    double mean = dataset.getNumInstances() / 2;

    Attribute current;
    ImbalancedFeature currentLabel;

    for (int i = 0; i < labelIndices.length; i++) //for each label
    {
        nZeros = 0;
        nOnes = 0;
        current = instances.attribute(labelIndices[i]); //current label

        for (int j = 0; j < instances.size(); j++) //for each instance
        {
            if (instances.instance(j).value(current) == 1.0) {
                nOnes++;
            } else {
                nZeros++;
            }
        }

        try {
            if (nZeros == 0 || nOnes == 0) {
                IRIntraClass = 0;
            } else if (nZeros > nOnes) {
                IRIntraClass = (double) nZeros / nOnes;
            } else {
                IRIntraClass = (double) nOnes / nZeros;
            }
        } catch (Exception e1) {
            IRIntraClass = 0;
        }

        variance = (Math.pow((nZeros - mean), 2) + Math.pow((nOnes - mean), 2)) / 2;

        currentLabel = getLabelByName(current.name(), labels);

        maxAppearance = labels[0].getAppearances();

        if (currentLabel.getAppearances() <= 0) {
            IRInterClass = Double.NaN;
        } else {
            IRInterClass = (double) maxAppearance / currentLabel.getAppearances();
        }

        labels_imbalanced[i] = new ImbalancedFeature(current.name(), currentLabel.getAppearances(),
                IRInterClass, IRIntraClass, variance);
    }

    return labels_imbalanced;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Use inclusion probability to discover the cluster "nearest" the provided instance
 *
 * @param D instance set to sort from/*  w w  w  .  j a  v a2  s . c om*/
 * @param x instance in question
 * @return sorted set of clusters, ordered by inc
 */
protected final NearestInstanceTuple[] findNearestNeighbors(Instances D, Instance x) {
    NearestInstanceTuple[] ret = new NearestInstanceTuple[D.size()];
    double[] xVals = x.toDoubleArray();
    int idx = 0;
    for (Instance n : D) {
        ret[idx++] = new NearestInstanceTuple(n, VectorDistances.distance(xVals, n.toDoubleArray(), D,
                this.distanceStrategyOption.getChosenIndex()));
    } // end for
    Arrays.parallelSort(ret);
    return ret;
}

From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java

License:Open Source License

/**
 *
 * @param testInstances instance set to evaluate accuracy
 * @return number of instances actually tested
 *///  www.  j  av  a  2  s  .co m
private int test(Instances testInstances) {
    this.monitor.setCurrentActivityDescription("Testing Instances");
    int ret = testInstances.size();
    int novelClassLabel = testInstances.numClasses();
    int outlierLabel = novelClassLabel + 1;

    // For latent label outliers that have reached their deadline, we must now make a decision:
    while (!this.pendingFinalLabelInstQueue.isEmpty()
            && this.pendingFinalLabelInstQueue.peek().deadline <= this.instancesProcessed) {
        TimeBoxedInstance ti = this.pendingFinalLabelInstQueue.pop();
        int y = (int) ti.inst.classValue();
        double[] prediction = null;
        if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) {
            Instance novelInst = (Instance) ti.inst.copy();
            //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset()));
            //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR);
            novelInst.setWeight(NOVEL_WEIGHT);
            prediction = learner.getVotesForInstance(novelInst);
            evaluator.addResult(novelInst, prediction); // Outlier out of time. Remove it
        } else {
            prediction = learner.getVotesForInstance(ti.inst);
            evaluator.addResult(ti.inst, prediction); // Outlier out of time. Remove it
        }

        this.cm.add(weka.core.Utils.maxIndex(prediction), ti.inst.classValue());
    }

    // Run accuracy test for current instance(s)
    for (Instance i : testInstances) {
        int y = (int) i.classValue();
        double[] prediction = null;
        Instance instToActuallyPredict = i;
        // If novel, make a special instance
        if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) {
            instToActuallyPredict = (Instance) i.copy();
            //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset()));
            //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); // WARNING - this crashes other algorithms if not also done on training!
            instToActuallyPredict.setWeight(NOVEL_WEIGHT);
        }
        prediction = learner.getVotesForInstance(instToActuallyPredict);
        if ((prediction.length > outlierLabel) && (prediction[outlierLabel] > (1.0 / prediction.length))) {
            this.pendingFinalLabelInstQueue.add(new TimeBoxedInstance(i, this.instancesProcessed,
                    this.labelDeadlineOption.getValue(), prediction)); // Delay accuracy metrics until stale time
        } else {
            evaluator.addResult(instToActuallyPredict, prediction); // Not an outlier, so treat it like normal
            this.cm.add(weka.core.Utils.maxIndex(prediction), i.classValue());
        }
    } // end for

    assert this.pendingFinalLabelInstQueue.size() < (this.labelDeadlineOption.getValue()
            + 1) : "Cache 'pendingFinalLabelInstQueue' is larger than designed.";
    return ret;
}

From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java

public void updateClassifier(Instances data) {

    System.out.println("########## UPDATE PHASE ############");
    this.updates++;

    // Performance berechnen und Adwin befllen
    System.out.println("Data size: " + data.size());
    System.out.println("Determine Performance...");
    determinePerformance(data, baseClassifier);

    accuracy_array.add((double) counter_one / batchSize.getValue());
    if (change) {
        accuracy_array.clear();/* w ww  .  j  av a  2 s .  c om*/
    }

    if (!adwinOnly.isSet()) {
        if (variance != 0) {
            old_variance = variance;
        }

        variance = 0;
        average = 0;

        if (accuracy_array.size() > 2) {
            for (int i = 0; i < accuracy_array.size(); i++) {
                average += accuracy_array.get(i);
            }
            average = average / accuracy_array.size();

            for (int i = 0; i < accuracy_array.size(); i++) {
                variance += Math.pow(average - accuracy_array.get(i), 2);
            }
            variance = variance / accuracy_array.size();
        }

        System.out.println("Varianz: " + variance);
        System.out.println("epsilon: " + real_variance_epsilon);

        if (change) {
            maxBatchesToKeep = batchesToKeep.getValue();
        }
    }

    if ((this.ADError.getWidth() / batchSize.getValue()) < 1) {
        this.instanceStore.setNumBatches(1);
    } else {
        if (adwinOnly.isSet()) {
            this.instanceStore.setNumBatches(this.ADError.getWidth() / batchSize.getValue());
        } else {
            if (Math.abs((double) variance - old_variance) < (double) real_variance_epsilon) {
                maxBatchesToKeep = this.instanceStore.numBatches;
                System.out.println("Batchsize wird nicht erhht wegen Varianz");
            }
            this.instanceStore
                    .setNumBatches(Math.min(maxBatchesToKeep, this.ADError.getWidth() / batchSize.getValue()));
            //this.instanceStore.setNumBatches(this.ADError.getWidth() / batchSize.getValue());
        }
    }

    // wenn die Fenstergre maximal ist, change wieder auf "false" setzen
    if (this.instanceStore.numBatches == batchesToKeep.getValue()) {
        //change = false;
    }

    // First: merge the new instances to the "Instance Store"
    this.instanceStore.addInstances(data);
    System.out.println("size ADWIN: " + this.ADError.getWidth());
    System.out.println("size InstanceStore: " + this.instanceStore.numBatches);
    Instances currentStore = this.instanceStore.getInstances();

    System.out.println("Update at Instance: " + this.numInstances + " | Size of Instance store (updates:"
            + this.updates + "): " + currentStore.size());

    // Turn the instances into a binary learning problem to learn the decision space where the original classifier was wrong
    //writeArff("C:\\StAtIC\\experiments\\orig.arff", currentStore);

    //writeArff("C:\\StAtIC\\experiments\\modded.arff", this.reDefinedClasses);
    //        System.exit(9525356);

    // Determine the subsets of instances which are covered by the rules (that are not the default rule)

    if (this.useBaseClassAsAttribute.isSet()) {
        currentStore = addBaseClassToInstances(currentStore);
    }

    //if(change) {

    System.out.println("Redefine Problem...");
    this.reDefinedClasses = redefineProblem(currentStore);

    // Now: learn the error regions with a specially adapted or a normal classifier:
    try {
        System.out.println("Build Classifier...");
        this.regionDecider = new DSALearnerWrapper(getDecisionSpaceClassifier());
        regionDecider.buildClassifier(reDefinedClasses);

        //            System.out.println("Error Space Classifier:"); System.out.println(regionDecider.toString());       // Todo remove this out
    } catch (Exception e) {
        System.err.println("Error building region decider");
        System.err.println(e.getStackTrace());
        System.err.println(e.getMessage());
        System.exit(123452345);
    }

    System.out.println("Determine Subsets...");
    this.subsets = determineSubsets(currentStore, regionDecider);

    // Determine the performance of the BASE classifier for each of those subsets
    //this.basePerfOnSubset = determineBasePerformanceOnSubsets(this.subsets, baseClassifier);

    // Create individual models for the subsets
    this.regionPatches = createPatches(this.subsets, this.basePerfOnSubset);
    System.out.println("Region Decision Subsets: " + subsets.size());

    //}

    //        System.exit(18567820);
    System.out.println("##############################\n\n\n");
}

From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java

/**
 * Learns a specific subset classifier (of the same type as the base
 * classifier) to improve accuracy on the regions that performed bad before.
 *
 * @param subsets//from  www. ja  v a  2s.  co  m
 * @param basePerformance
 * @return
 */
private Vector createPatches(Vector subsets, Vector basePerformance) {
    Vector patches = new Vector();

    System.out.println("Creating patches: #" + subsets.size());
    try {
        for (int d = 0; d < subsets.size(); d++) {

            Instances set = (Instances) subsets.get(d);

            //                if(this.useBaseClassAsAttribute.isSet()) {
            //                        writeArff("C:\\StAtIC\\experiments\\set"+d+".arff", set);
            //                    }
            //                System.out.println("Set " + d + " size: " + set.size());
            Classifier patch;
            if (set.size() < 5) // Too small to do anything properly
            {
                patch = null; // null will then default to base classifier
            } else {

                patch = getPatchClassifier();
                patch.buildClassifier(set);
            }

            patches.add(d, patch);
        }
    } catch (Exception e) {
        System.err.println("Error building patches:");
        System.err.println(e.getMessage());
    }

    //        System.out.println("\n--- Patches ------------");
    //        for (int i = 0; i < patches.size(); i++) {
    //            Classifier tmp = (Classifier) patches.get(i);
    //            if (tmp != null) {
    //                System.out.print("Patch " + i+" - ");
    //                System.out.println(tmp);
    //            }
    //        }
    //        System.out.println("------------------------");
    //        System.exit(45768545);
    return patches;
}

From source file:moa.tud.ke.patching.AdaptivePatchingTwoAdwins.java

public void updateClassifier(Instances data) {

    System.out.println("########## UPDATE PHASE ############");
    this.updates++;

    // Performance berechnen und Adwin befllen
    System.out.println("Data size: " + data.size());
    System.out.println("Determine Performance...");
    determinePerformance(data, baseClassifier);

    if ((this.ADError.getWidth() / batchSize.getValue()) < 1) {
        this.instanceStore.setNumBatches(1);
    } else {/*www .jav a2  s.  c  o m*/
        if (change) {
            corrected_adwin_size = this.ADError.getWidth() / batchSize.getValue() - 1;
        }
        if (!changeFine) {
            maxBatchesToKeep = this.instanceStore.numBatches;
            corrected_adwin_size++;
        } else {
            maxBatchesToKeep = Math.max(1,
                    this.ADError.getWidth() / batchSize.getValue() - corrected_adwin_size);
        }
        this.instanceStore
                .setNumBatches(Math.min(maxBatchesToKeep, this.ADError.getWidth() / batchSize.getValue()));
        //this.instanceStore.setNumBatches(this.ADError.getWidth() / batchSize.getValue());
    }

    // wenn die Fenstergre maximal ist, change wieder auf "false" setzen
    if (this.instanceStore.numBatches == batchesToKeep.getValue()) {
        //change = false;
    }

    // First: merge the new instances to the "Instance Store"
    this.instanceStore.addInstances(data);
    System.out.println("size ADWIN: " + this.ADError.getWidth());
    System.out.println("size InstanceStore: " + this.instanceStore.numBatches);
    Instances currentStore = this.instanceStore.getInstances();

    System.out.println("Update at Instance: " + this.numInstances + " | Size of Instance store (updates:"
            + this.updates + "): " + currentStore.size());

    // Turn the instances into a binary learning problem to learn the decision space where the original classifier was wrong
    //writeArff("C:\\StAtIC\\experiments\\orig.arff", currentStore);

    //writeArff("C:\\StAtIC\\experiments\\modded.arff", this.reDefinedClasses);
    //        System.exit(9525356);

    // Determine the subsets of instances which are covered by the rules (that are not the default rule)

    if (this.useBaseClassAsAttribute.isSet()) {
        currentStore = addBaseClassToInstances(currentStore);
    }

    //if(change) {

    System.out.println("Redefine Problem...");
    this.reDefinedClasses = redefineProblem(currentStore);

    // Now: learn the error regions with a specially adapted or a normal classifier:
    try {
        System.out.println("Build Classifier...");
        this.regionDecider = new DSALearnerWrapper(getDecisionSpaceClassifier());
        regionDecider.buildClassifier(reDefinedClasses);

        //            System.out.println("Error Space Classifier:"); System.out.println(regionDecider.toString());       // Todo remove this out
    } catch (Exception e) {
        System.err.println("Error building region decider");
        System.err.println(e.getStackTrace());
        System.err.println(e.getMessage());
        System.exit(123452345);
    }

    System.out.println("Determine Subsets...");
    this.subsets = determineSubsets(currentStore, regionDecider);

    // Determine the performance of the BASE classifier for each of those subsets
    //this.basePerfOnSubset = determineBasePerformanceOnSubsets(this.subsets, baseClassifier);

    // Create individual models for the subsets
    this.regionPatches = createPatches(this.subsets, this.basePerfOnSubset);
    System.out.println("Region Decision Subsets: " + subsets.size());

    //}

    //        System.exit(18567820);
    System.out.println("##############################\n\n\n");
}

From source file:moa.tud.ke.patching.InstanceStore.java

public void cleanBatch(int index, int size) {
    Instances inst = getBatch(index);
    System.out.println("Size Batch: " + inst.size());
    while (inst.size() > size) {
        inst.delete(0);//  w w  w .j  av a  2 s.com
    }
    System.out.println("Size Batch: " + inst.size());
}

From source file:moa.tud.ke.patching.Patching.java

/**
 * Starts the update phase and executes multiple steps for the update such
 * as learning the error regions and building patches for them.
 *
 * @param data/* w  w  w  . ja va2 s .com*/
 */
public void updateClassifier(Instances data) {

    System.out.println("########## UPDATE PHASE ############");
    this.updates++;

    // First: merge the new instances to the "Instance Store"
    this.instanceStore.addInstances(data);
    Instances currentStore = this.instanceStore.getInstances();

    System.out.println("Update at Instance: " + this.numInstances + " | Size of Instance store (updates:"
            + this.updates + "): " + currentStore.size());

    // Turn the instances into a binary learning problem to learn the decision space where the original classifier was wrong
    this.reDefinedClasses = redefineProblem(currentStore);

    // Now: learn the error regions with a specially adapted or a normal classifier:
    try {
        this.regionDecider = new DSALearnerWrapper(getDecisionSpaceClassifier());
        regionDecider.buildClassifier(reDefinedClasses);
    } catch (Exception e) {
        System.err.println("Error building region decider");
        System.err.println(e.getStackTrace());
        System.err.println(e.getMessage());
    }

    // Optional: add the original prediction as an additional attribute:
    if (this.useBaseClassAsAttribute.isSet()) {
        currentStore = addBaseClassToInstances(currentStore);
    }

    // Determine the subsets of instances which are covered by the rules (that are not the default rule)
    this.subsets = determineSubsets(currentStore, regionDecider);
    System.out.println("Region Decision Subsets: " + subsets.size());

    // Create individual models for the subsets
    this.regionPatches = createPatches(this.subsets, this.basePerfOnSubset);

    System.out.println("##############################\n\n\n");
}