Example usage for weka.core Utils isMissingValue

List of usage examples for weka.core Utils isMissingValue

Introduction

In this page you can find the example usage for weka.core Utils isMissingValue.

Prototype

public static boolean isMissingValue(double val) 

Source Link

Document

Tests if the given value codes "missing".

Usage

From source file:Bilbo.java

License:Open Source License

/**
 * Bagging method.// www .  j  a va2 s.  c  o m
 *
 * @param data the training data to be used for generating the
 * bagged classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data, Instances p_unlabeledData) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // Has user asked to represent copies using weights?
    if (getRepresentCopiesUsingWeights() && !(m_Classifier instanceof WeightedInstancesHandler)) {
        throw new IllegalArgumentException("Cannot represent copies using weights when "
                + "base learner in bagging does not implement " + "WeightedInstancesHandler.");
    }

    // get fresh Instances object
    m_data = new Instances(data);
    m_unlabeledData = new Instances(p_unlabeledData);

    super.buildClassifier(m_data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    m_random = new Random(m_Seed);

    m_inBag = null;
    if (m_CalcOutOfBag)
        m_inBag = new boolean[m_Classifiers.length][];

    for (int j = 0; j < m_Classifiers.length; j++) {
        if (m_Classifier instanceof Randomizable) {
            ((Randomizable) m_Classifiers[j]).setSeed(m_random.nextInt());
        }
    }
    //Insert oracle loop here TODO

    buildClassifiers();
    Instances inst = new Instances(m_data);
    for (int i = 0; i < m_Classifiers.length; i++) {
        inst.clear();
        ((NewTree) m_Classifiers[i]).GetTransductedInstances(inst);
        ((NewTree) m_Classifiers[i]).DoInduction(inst);
        // Ehm, do something boyski
    }
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = m_data.classAttribute().isNumeric();

        for (int i = 0; i < m_data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[m_data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (m_inBag[j][i])
                    continue;

                if (numeric) {
                    double pred = ((NewTree) m_Classifiers[j]).classifyInstance(m_data.instance(i));
                    if (!Utils.isMissingValue(pred)) {
                        votes[0] += pred;
                        voteCount++;
                    }
                } else {
                    voteCount++;
                    double[] newProbs = ((NewTree) m_Classifiers[j])
                            .distributionForInstance(m_data.instance(i));
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }
                }
            }

            // "vote"
            if (numeric) {
                if (voteCount == 0) {
                    vote = Utils.missingValue();
                } else {
                    vote = votes[0] / voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                    vote = Utils.missingValue();
                } else {
                    vote = Utils.maxIndex(votes); // predicted class
                    Utils.normalize(votes);
                }
            }

            // error for instance
            if (!Utils.isMissingValue(vote) && !m_data.instance(i).classIsMissing()) {
                outOfBagCount += m_data.instance(i).weight();
                if (numeric) {
                    errorSum += (StrictMath.abs(vote - m_data.instance(i).classValue())
                            * m_data.instance(i).weight()) / m_data.instance(i).classValue();
                } else {
                    if (vote != m_data.instance(i).classValue())
                        errorSum += m_data.instance(i).weight();
                }
            }
        }

        if (outOfBagCount > 0) {
            m_OutOfBagError = errorSum / outOfBagCount;
        }
    } else {
        m_OutOfBagError = 0;
    }

    // save memory
    m_data = null;
}

From source file:Bilbo.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test
 * instance.//  w  ww  .  j  ava2s.c o  m
 *
 * @param instance the instance to be classified
 * @return preedicted class probability distribution
 * @throws Exception if distribution can't be computed successfully 
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    double[] sums = new double[instance.numClasses()], newProbs;

    double numPreds = 0;
    for (int i = 0; i < m_NumIterations; i++) {
        if (instance.classAttribute().isNumeric() == true) {
            double pred = ((NewTree) m_Classifiers[i]).classifyInstance(instance);
            if (!Utils.isMissingValue(pred)) {
                sums[0] += pred;
                numPreds++;
            }
        } else {
            newProbs = ((NewTree) m_Classifiers[i]).distributionForInstance(instance);
            for (int j = 0; j < newProbs.length; j++)
                sums[j] += newProbs[j];
        }
    }
    if (instance.classAttribute().isNumeric() == true) {
        if (numPreds == 0) {
            sums[0] = Utils.missingValue();
        } else {
            sums[0] /= numPreds;
        }
        return sums;
    } else if (Utils.eq(Utils.sum(sums), 0)) {
        return sums;
    } else {
        Utils.normalize(sums);
        return sums;
    }
}

From source file:adams.flow.condition.bool.WekaClassification.java

License:Open Source License

/**
 * Returns the index of the case that should get executed.
 * //from   www .  ja  va2  s.c  o  m
 * @param owner   the owning actor
 * @param token   the current token passing through the actor
 * @return      the index, -1 if not available
 */
public int getCaseIndex(Actor owner, Token token) {
    int result;
    double classification;
    String msg;
    Instance inst;

    result = -1;

    if (m_OnTheFly && (m_Model == null)) {
        msg = setUpModel(owner);
        if (msg != null) {
            getLogger().severe(msg);
            return result;
        }
    }

    if ((token != null) && (token.getPayload() != null)) {
        inst = ((Instance) token.getPayload());
        if (inst.classIndex() == -1) {
            getLogger().severe("No class set!");
            return result;
        }
        if (!inst.classAttribute().isNominal()) {
            getLogger().severe("Class attribute is not nominal!");
            return result;
        }

        try {
            classification = m_Model.classifyInstance(inst);
            if (Utils.isMissingValue(classification))
                result = -1;
            else
                result = (int) classification;
        } catch (Exception e) {
            getLogger().log(Level.SEVERE, "Failed to obtain classification: ", e);
        }
    }

    return result;
}

From source file:distributed.core.DistributedUtils.java

License:Open Source License

public static NumericStats getNumericAttributeStatsSparse(Instances denormalized, int attIndex) {
    NumericStats ns = new NumericStats(denormalized.attribute(attIndex).name());

    for (int j = 0; j < denormalized.numInstances(); j++) {
        double value = denormalized.instance(j).value(attIndex);

        if (Utils.isMissingValue(value) || value == 0) {
            ns.getStats()[ArffSummaryNumericMetric.MISSING.ordinal()]++;
        } else {/*  ww w.ja v a 2  s . co  m*/
            ns.getStats()[ArffSummaryNumericMetric.COUNT.ordinal()]++;
            ns.getStats()[ArffSummaryNumericMetric.SUM.ordinal()] += value;
            ns.getStats()[ArffSummaryNumericMetric.SUMSQ.ordinal()] += value * value;
            if (Double.isNaN(ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()])) {
                ns.getStats()[ArffSummaryNumericMetric.MIN
                        .ordinal()] = ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = value;
            } else if (value < ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()]) {
                ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()] = value;
            } else if (value > ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()]) {
                ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = value;
            }
        }
    }

    ns.computeDerived();

    return ns;
}

From source file:en_deep.mlprocess.manipulation.featmodif.FeatureModifierFilter.java

License:Open Source License

/**
 * Retrieves the values for all output attributes relating to the given source attribute.
 * // w w w  . j a  va2  s.c o m
 * @param att the source attribute
 * @param attVal the attribute value
 * @param stringValArr the field where the double values are to be set to missing
 * @param stringValArr the field where the string values are to be stored
 * @param offset the offset where the values for this attribute should begin
 * @return the number of attribute values written
 */
private int getAttributeOutputValue(Attribute att, double attVal, double[] valArr, String[] stringValArr,
        int offset) {

    if (this.m_PreserveOriginals) {
        valArr[offset] = attVal;
        offset++;
    }
    String[] outVals = this.m_OperClass
            .getOutputValues(Utils.isMissingValue(attVal) ? null : att.value((int) attVal));
    System.arraycopy(outVals, 0, stringValArr, offset, outVals.length);

    for (int i = 0; i < outVals.length; ++i) {
        valArr[offset + i] = Utils.missingValue();
    }

    return outVals.length + (this.m_PreserveOriginals ? 1 : 0);
}

From source file:en_deep.mlprocess.manipulation.SetAwareNominalToBinary.java

License:Open Source License

/**
 * Sets the values for all binary attributes pertaining to the given source attribute with
 * respect to possible multiple values (and normal setting, if {@link #m_DualMode} is enabled).
 * //from   w ww. j a v  a2 s . co  m
 * @param att the source attribute
 * @param value the source value
 * @param vals the field where the values are to be stored
 * @param offset the offset where the values for this attribute should begin
 * @return number of processed output columns
 */
private int setConvertedAttribute(Attribute att, double value, double[] vals, int offset) {

    String strVal = att.value((int) value);
    String[] setVals = strVal.split(m_Separator);
    int totalValues = 0;

    if (m_DualMode || m_SetOnlyPrefix != null && !att.name().startsWith(m_SetOnlyPrefix)) {
        totalValues += att.numValues();
    }
    if (m_SetOnlyPrefix == null || att.name().startsWith(m_SetOnlyPrefix)) {
        totalValues += m_producedAttVals[att.index()].size();
    }

    if (Utils.isMissingValue(value)) {
        for (int i = 0; i < totalValues; ++i) {
            vals[offset + i] = value;
        }
        return totalValues;
    }

    if (m_DualMode || m_SetOnlyPrefix != null && !att.name().startsWith(m_SetOnlyPrefix)) {
        vals[offset + (int) value] = 1;
        offset += att.numValues();
    }
    if (m_SetOnlyPrefix == null || att.name().startsWith(m_SetOnlyPrefix)) {
        for (String setVal : setVals) {
            vals[offset + m_producedAttVals[att.index()].get(setVal)] = 1;
        }
    }
    return totalValues;
}

From source file:moa.classifiers.BinaryTreeNumericAttributeClassObserver.java

License:Open Source License

public void observeAttributeClass(double attVal, int classVal, double weight) {
    if (Utils.isMissingValue(attVal)) {

    } else {//from   ww w . j a v a 2 s . c om
        if (this.root == null) {
            this.root = new Node(attVal, classVal, weight);
        } else {
            this.root.insertValue(attVal, classVal, weight);
        }
    }
}

From source file:moa.classifiers.core.attributeclassobservers.GaussianNumericAttributeClassObserver.java

License:Open Source License

@Override
public void observeAttributeClass(double attVal, int classVal, double weight) {
    if (Utils.isMissingValue(attVal)) {
    } else {/*from ww w.jav  a 2s.  c  o m*/
        GaussianEstimator valDist = this.attValDistPerClass.get(classVal);
        if (valDist == null) {
            valDist = new GaussianEstimator();
            this.attValDistPerClass.set(classVal, valDist);
            this.minValueObservedPerClass.setValue(classVal, attVal);
            this.maxValueObservedPerClass.setValue(classVal, attVal);
        } else {
            if (attVal < this.minValueObservedPerClass.getValue(classVal)) {
                this.minValueObservedPerClass.setValue(classVal, attVal);
            }
            if (attVal > this.maxValueObservedPerClass.getValue(classVal)) {
                this.maxValueObservedPerClass.setValue(classVal, attVal);
            }
        }
        valDist.addObservation(attVal, weight);
    }
}

From source file:moa.classifiers.core.attributeclassobservers.GHNumericAttributeClassObserver.java

License:Open Source License

@Override
public void observeAttributeClass(double attVal, int classVal, double weight) {
    if (Utils.isMissingValue(attVal)) {
    } else {//from w  w  w .ja v  a 2  s .  c o  m
        // Create object to observe distribution of attribute values.
        GaussianEstimator valDist = this.attValDistPerClass.get(classVal);

        if (valDist == null) {
            // If attribute not previously observed, initialize the observer.
            valDist = new GaussianEstimator();

            // Add the observer to a vector storing all attribute observers.
            this.attValDistPerClass.set(classVal, valDist);

            // Sets the minimum and maximum values observed for each class.
            this.minValueObservedPerClass.setValue(classVal, attVal);
            this.maxValueObservedPerClass.setValue(classVal, attVal);
        } else {
            // Update minimum and max values observed.
            if (attVal < this.minValueObservedPerClass.getValue(classVal))
                this.minValueObservedPerClass.setValue(classVal, attVal);
            if (attVal > this.maxValueObservedPerClass.getValue(classVal))
                this.maxValueObservedPerClass.setValue(classVal, attVal);
        }

        // Pass the observation to the attribute observer, so that it can update
        // its internal statistics, i.e. mean, variance etc.
        valDist.addObservation(attVal, weight);
    }
}

From source file:moa.classifiers.core.attributeclassobservers.GreenwaldKhannaNumericAttributeClassObserver.java

License:Open Source License

@Override
public void observeAttributeClass(double attVal, int classVal, double weight) {
    if (Utils.isMissingValue(attVal)) {
    } else {/*from   w w w  .  j  a  v a  2 s.c o  m*/
        GreenwaldKhannaQuantileSummary valDist = this.attValDistPerClass.get(classVal);
        if (valDist == null) {
            valDist = new GreenwaldKhannaQuantileSummary(this.numTuplesOption.getValue());
            this.attValDistPerClass.set(classVal, valDist);
        }
        // TODO: not taking weight into account
        valDist.insert(attVal);
    }
}