Example usage for weka.core Instance classValue

List of usage examples for weka.core Instance classValue

Introduction

In this page you can find the example usage for weka.core Instance classValue.

Prototype

public double classValue();

Source Link

Document

Returns an instance's class value as a floating-point number.

Usage

From source file:moa.tasks.EvaluatePrequentialRegression.java

License:Open Source License

@Override
protected Object doMainTask(TaskMonitor monitor, ObjectRepository repository) {
    Classifier learner = (Classifier) getPreparedClassOption(this.learnerOption);
    InstanceStream stream = (InstanceStream) getPreparedClassOption(this.streamOption);
    RegressionPerformanceEvaluator evaluator = (RegressionPerformanceEvaluator) getPreparedClassOption(
            this.evaluatorOption);
    LearningCurve learningCurve = new LearningCurve("learning evaluation instances");

    //New for prequential methods
    if (evaluator instanceof WindowClassificationPerformanceEvaluator) {
        //((WindowClassificationPerformanceEvaluator) evaluator).setWindowWidth(widthOption.getValue());
        if (widthOption.getValue() != 1000) {
            System.out.println(/*ww w  .  j  av  a 2s.  c om*/
                    "DEPRECATED! Use EvaluatePrequential -e (WindowClassificationPerformanceEvaluator -w "
                            + widthOption.getValue() + ")");
            return learningCurve;
        }
    }
    if (evaluator instanceof EWMAClassificationPerformanceEvaluator) {
        //((EWMAClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue());
        if (alphaOption.getValue() != .01) {
            System.out.println(
                    "DEPRECATED! Use EvaluatePrequential -e (EWMAClassificationPerformanceEvaluator -a "
                            + alphaOption.getValue() + ")");
            return learningCurve;
        }
    }
    if (evaluator instanceof FadingFactorClassificationPerformanceEvaluator) {
        //((FadingFactorClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue());
        if (alphaOption.getValue() != .01) {
            System.out.println(
                    "DEPRECATED! Use EvaluatePrequential -e (FadingFactorClassificationPerformanceEvaluator -a "
                            + alphaOption.getValue() + ")");
            return learningCurve;
        }
    }
    //End New for prequential methods

    learner.setModelContext(stream.getHeader());
    int maxInstances = this.instanceLimitOption.getValue();
    long instancesProcessed = 0;
    int maxSeconds = this.timeLimitOption.getValue();
    int secondsElapsed = 0;
    monitor.setCurrentActivity("Evaluating learner...", -1.0);

    File dumpFile = this.dumpFileOption.getFile();
    PrintStream immediateResultStream = null;
    if (dumpFile != null) {
        try {
            if (dumpFile.exists()) {
                immediateResultStream = new PrintStream(new FileOutputStream(dumpFile, true), true);
            } else {
                immediateResultStream = new PrintStream(new FileOutputStream(dumpFile), true);
            }
        } catch (Exception ex) {
            throw new RuntimeException("Unable to open immediate result file: " + dumpFile, ex);
        }
    }
    //File for output predictions
    File outputPredictionFile = this.outputPredictionFileOption.getFile();
    PrintStream outputPredictionResultStream = null;
    if (outputPredictionFile != null) {
        try {
            if (outputPredictionFile.exists()) {
                outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile, true),
                        true);
            } else {
                outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile),
                        true);
            }
        } catch (Exception ex) {
            throw new RuntimeException("Unable to open prediction result file: " + outputPredictionFile, ex);
        }
    }
    boolean firstDump = true;
    boolean preciseCPUTiming = TimingUtils.enablePreciseTiming();
    long evaluateStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
    long lastEvaluateStartTime = evaluateStartTime;
    double RAMHours = 0.0;
    while (stream.hasMoreInstances() && ((maxInstances < 0) || (instancesProcessed < maxInstances))
            && ((maxSeconds < 0) || (secondsElapsed < maxSeconds))) {
        Instance trainInst = stream.nextInstance();
        Instance testInst = (Instance) trainInst.copy();
        double trueClass = trainInst.classValue();
        //testInst.setClassMissing();
        double[] prediction = learner.getVotesForInstance(testInst);
        // Output prediction
        if (outputPredictionFile != null) {
            outputPredictionResultStream.println(prediction[0] + "," + trueClass);
        }

        //evaluator.addClassificationAttempt(trueClass, prediction, testInst.weight());
        evaluator.addResult(testInst, prediction);
        learner.trainOnInstance(trainInst);
        instancesProcessed++;
        if (instancesProcessed % this.sampleFrequencyOption.getValue() == 0
                || stream.hasMoreInstances() == false) {
            long evaluateTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
            double time = TimingUtils.nanoTimeToSeconds(evaluateTime - evaluateStartTime);
            double timeIncrement = TimingUtils.nanoTimeToSeconds(evaluateTime - lastEvaluateStartTime);
            double RAMHoursIncrement = learner.measureByteSize() / (1024.0 * 1024.0 * 1024.0); //GBs
            RAMHoursIncrement *= (timeIncrement / 3600.0); //Hours
            RAMHours += RAMHoursIncrement;
            lastEvaluateStartTime = evaluateTime;
            learningCurve.insertEntry(new LearningEvaluation(
                    new Measurement[] { new Measurement("learning evaluation instances", instancesProcessed),
                            new Measurement("evaluation time (" + (preciseCPUTiming ? "cpu " : "") + "seconds)",
                                    time),
                            new Measurement("model cost (RAM-Hours)", RAMHours) },
                    evaluator, learner));

            if (immediateResultStream != null) {
                if (firstDump) {
                    immediateResultStream.println(learningCurve.headerToString());
                    firstDump = false;
                }
                immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() - 1));
                immediateResultStream.flush();
            }
        }
        if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) {
            if (monitor.taskShouldAbort()) {
                return null;
            }
            long estimatedRemainingInstances = stream.estimatedRemainingInstances();
            if (maxInstances > 0) {
                long maxRemaining = maxInstances - instancesProcessed;
                if ((estimatedRemainingInstances < 0) || (maxRemaining < estimatedRemainingInstances)) {
                    estimatedRemainingInstances = maxRemaining;
                }
            }
            monitor.setCurrentActivityFractionComplete(estimatedRemainingInstances < 0 ? -1.0
                    : (double) instancesProcessed
                            / (double) (instancesProcessed + estimatedRemainingInstances));
            if (monitor.resultPreviewRequested()) {
                monitor.setLatestResultPreview(learningCurve.copy());
            }
            secondsElapsed = (int) TimingUtils
                    .nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread() - evaluateStartTime);
        }
    }
    if (immediateResultStream != null) {
        immediateResultStream.close();
    }
    if (outputPredictionResultStream != null) {
        outputPredictionResultStream.close();
    }
    return learningCurve;
}

From source file:myclassifier.MyC45.java

/**
 * Method building ID3 tree.//from   w  w w.j a  v a  2  s.  c  o m
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = -1; //Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] gainRatios = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        gainRatios[att.index()] = computeGainRatio(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(gainRatios));

    // Make leaf if information gain is zero. 
    // Otherwise create successors.

    if (Utils.eq(gainRatios[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new MyC45[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new MyC45();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:myclassifier.MyC45.java

private double computeEntropyFromData(Instances data) throws Exception {
    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();

    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }/*from  ww w  .  j  a  va 2 s .  c  o  m*/

    double entropy = 0;

    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0)
            entropy -= (double) (classCounts[j] / data.numInstances())
                    * Utils.log2((double) classCounts[j] / data.numInstances());
    }

    //return entropy + Utils.log2(data.numInstances());
    return entropy;
}

From source file:myclassifier.myC45Pack.ClassDistribution.java

/**
 * Adds instance to subDataset./* ww  w .j av  a 2 s  .  com*/
 *
 * @exception Exception if something goes wrong
 */
public void addInstance(int subDatasetIndex, Instance instance) throws Exception {

    int classIndex = (int) instance.classValue();
    double weight = instance.weight();

    w_perClassPerSubdataset[subDatasetIndex][classIndex] = w_perClassPerSubdataset[subDatasetIndex][classIndex]
            + weight;
    w_perSubdataset[subDatasetIndex] = w_perSubdataset[subDatasetIndex] + weight;
    w_perClass[classIndex] = w_perClass[classIndex] + weight;
    totalWeights = totalWeights + weight;
}

From source file:myclassifier.myC45Pack.ClassDistribution.java

/**
 * Adds all instances with unknown values for given attribute, weighted
 * according to frequency of instances in each bag.
 *
 * @exception Exception if something goes wrong
 *//*from w  ww  .  j a  v  a  2  s.c  om*/
public void addInstWithMissValue(Instances dataSet, int attIndex) throws Exception {

    double[] valueProbs;
    double weight, newWeight;
    int classIndex;
    Instance instance;

    valueProbs = new double[w_perSubdataset.length];
    for (int i = 0; i < w_perSubdataset.length; i++) {
        if (totalWeights == 0) {
            valueProbs[i] = 1.0 / valueProbs.length;
        } else {
            valueProbs[i] = w_perSubdataset[i] / totalWeights;
        }
    }

    Enumeration E = dataSet.enumerateInstances();
    while (E.hasMoreElements()) {
        instance = (Instance) E.nextElement();
        if (instance.isMissing(attIndex)) {
            classIndex = (int) instance.classValue();
            weight = instance.weight();
            w_perClass[classIndex] = w_perClass[classIndex] + weight;
            totalWeights += weight;
            for (int i = 0; i < w_perSubdataset.length; i++) {
                newWeight = valueProbs[i] * weight;
                w_perClassPerSubdataset[i][classIndex] += newWeight;
                w_perSubdataset[i] += newWeight;
            }
        }
    }
}

From source file:myclassifier.myC45Pack.ClassDistribution.java

/**
 * Adds all instances in given range to given bag.
 *
 * @exception Exception if something goes wrong
 *///  w ww .ja  v  a 2s. c  om
public final void addRange(int subDatasetIndex, Instances dataSet, int startIndex, int lastIndex)
        throws Exception {

    double sumOfWeights = 0;
    int classIndex;
    Instance data;

    for (int i = startIndex; i < lastIndex; i++) {
        data = (Instance) dataSet.instance(i);
        classIndex = (int) data.classValue();
        sumOfWeights += data.weight();
        w_perClassPerSubdataset[subDatasetIndex][classIndex] += data.weight();
        w_perClass[classIndex] += data.weight();
    }
    w_perSubdataset[subDatasetIndex] += sumOfWeights;
    totalWeights += sumOfWeights;
}

From source file:myclassifier.myC45Pack.ClassDistribution.java

/**
 * Adds given instance to all bags weighting it according to given weights.
 *
 * @exception Exception if something goes wrong
 *///w w w  .  ja v a 2s.  co m
public void addWeights(Instance instance, double[] weights) throws Exception {

    int classIndex;

    classIndex = (int) instance.classValue();
    for (int i = 0; i < w_perSubdataset.length; i++) {
        double weight = instance.weight() * weights[i];
        w_perClassPerSubdataset[i][classIndex] += weight;
        w_perSubdataset[i] += weight;
        w_perClass[classIndex] += weight;
        totalWeights += weight;
    }
}

From source file:myclassifier.myC45Pack.ClassDistribution.java

/**
 * Move instances in given range from one bag to another one.
 *
 * @exception Exception if something goes wrong
 *//*from ww  w .j  a  v a 2s.  c  o  m*/
public void moveInstancesWithRange(int from, int to, Instances dataSet, int startIndex, int lastIndex)
        throws Exception {

    int classIndex;
    double weight;
    Instance data;

    for (int i = startIndex; i < lastIndex; i++) {
        data = (Instance) dataSet.instance(i);
        classIndex = (int) data.classValue();
        weight = data.weight();
        w_perClassPerSubdataset[from][classIndex] -= weight;
        w_perClassPerSubdataset[to][classIndex] += weight;
        w_perSubdataset[from] -= weight;
        w_perSubdataset[to] += weight;
    }
}

From source file:myID3.MyId3.java

/**
 * Construct the tree using the given instance
 * Find the highest attribute value which best at dividing the data
 * @param data Instance//w  w  w. ja va 2  s  .  c  o m
 */
public void buildTree(Instances data) {
    if (data.numInstances() > 0) {
        // Lets find the highest Information Gain!
        // First compute each information gain attribute
        double IG[] = new double[data.numAttributes()];
        Enumeration enumAttribute = data.enumerateAttributes();
        while (enumAttribute.hasMoreElements()) {
            Attribute attribute = (Attribute) enumAttribute.nextElement();
            IG[attribute.index()] = informationGain(data, attribute);
            // System.out.println(attribute.toString() + ": " + IG[attribute.index()]);
        }
        // Assign it as the tree attribute!
        currentAttribute = data.attribute(maxIndex(IG));
        //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]);

        // IG = 0 then current node = leaf!
        if (Utils.eq(IG[currentAttribute.index()], 0)) {
            // Set the class value as the highest frequency of the class
            currentAttribute = null;
            classDistribution = new double[data.numClasses()];
            Enumeration enumInstance = data.enumerateInstances();
            while (enumInstance.hasMoreElements()) {
                Instance temp = (Instance) enumInstance.nextElement();
                classDistribution[(int) temp.classValue()]++;
            }
            Utils.normalize(classDistribution);
            classValue = Utils.maxIndex(classDistribution);
            classAttribute = data.classAttribute();
        } else {
            // Create another node from the current tree
            Instances[] splitData = splitDataByAttribute(data, currentAttribute);
            nodes = new MyId3[currentAttribute.numValues()];

            for (int i = 0; i < currentAttribute.numValues(); i++) {
                nodes[i] = new MyId3();
                nodes[i].buildTree(splitData[i]);
            }
        }
    } else {
        classAttribute = null;
        classValue = Utils.missingValue();
        classDistribution = new double[data.numClasses()];
    }
}

From source file:myID3.MyId3.java

/**
 * Find the entropy from a given dataset
 * @param data//from w  ww  .  j a  va 2  s  .co  m
 * @return 
 */
private double entropy(Instances data) {

    /*  Entropy = -(p1 log2 p1) -(p2 log2 p2).... */

    double numInstance = data.numInstances();
    double numClass = data.numClasses();
    double[] distribution = new double[data.numClasses()];

    Enumeration instance = data.enumerateInstances();
    while (instance.hasMoreElements()) {
        Instance temp = (Instance) instance.nextElement();
        /* Count the p1, p2 */
        distribution[(int) temp.classValue()]++;
    }

    /* Sum all the distribution */
    double sum = 0;
    for (int i = 0; i < numClass; i++) {
        distribution[i] = distribution[i] / numInstance;
        if (distribution[i] > 0.0)
            distribution[i] *= Utils.log2(distribution[i]);
        // System.out.println(Arrays.toString(distribution));
        sum += distribution[i];
    }

    return -1 * sum;
}