Example usage for weka.core Utils missingValue

List of usage examples for weka.core Utils missingValue

Introduction

In this page you can find the example usage for weka.core Utils missingValue.

Prototype

public static double missingValue() 

Source Link

Document

Returns the value used to code a missing value.

Usage

From source file:Bilbo.java

License:Open Source License

/**
 * Bagging method./*  ww w .  j a  v a 2 s . c o m*/
 *
 * @param data the training data to be used for generating the
 * bagged classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data, Instances p_unlabeledData) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // Has user asked to represent copies using weights?
    if (getRepresentCopiesUsingWeights() && !(m_Classifier instanceof WeightedInstancesHandler)) {
        throw new IllegalArgumentException("Cannot represent copies using weights when "
                + "base learner in bagging does not implement " + "WeightedInstancesHandler.");
    }

    // get fresh Instances object
    m_data = new Instances(data);
    m_unlabeledData = new Instances(p_unlabeledData);

    super.buildClassifier(m_data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    m_random = new Random(m_Seed);

    m_inBag = null;
    if (m_CalcOutOfBag)
        m_inBag = new boolean[m_Classifiers.length][];

    for (int j = 0; j < m_Classifiers.length; j++) {
        if (m_Classifier instanceof Randomizable) {
            ((Randomizable) m_Classifiers[j]).setSeed(m_random.nextInt());
        }
    }
    //Insert oracle loop here TODO

    buildClassifiers();
    Instances inst = new Instances(m_data);
    for (int i = 0; i < m_Classifiers.length; i++) {
        inst.clear();
        ((NewTree) m_Classifiers[i]).GetTransductedInstances(inst);
        ((NewTree) m_Classifiers[i]).DoInduction(inst);
        // Ehm, do something boyski
    }
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = m_data.classAttribute().isNumeric();

        for (int i = 0; i < m_data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[m_data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (m_inBag[j][i])
                    continue;

                if (numeric) {
                    double pred = ((NewTree) m_Classifiers[j]).classifyInstance(m_data.instance(i));
                    if (!Utils.isMissingValue(pred)) {
                        votes[0] += pred;
                        voteCount++;
                    }
                } else {
                    voteCount++;
                    double[] newProbs = ((NewTree) m_Classifiers[j])
                            .distributionForInstance(m_data.instance(i));
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }
                }
            }

            // "vote"
            if (numeric) {
                if (voteCount == 0) {
                    vote = Utils.missingValue();
                } else {
                    vote = votes[0] / voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                    vote = Utils.missingValue();
                } else {
                    vote = Utils.maxIndex(votes); // predicted class
                    Utils.normalize(votes);
                }
            }

            // error for instance
            if (!Utils.isMissingValue(vote) && !m_data.instance(i).classIsMissing()) {
                outOfBagCount += m_data.instance(i).weight();
                if (numeric) {
                    errorSum += (StrictMath.abs(vote - m_data.instance(i).classValue())
                            * m_data.instance(i).weight()) / m_data.instance(i).classValue();
                } else {
                    if (vote != m_data.instance(i).classValue())
                        errorSum += m_data.instance(i).weight();
                }
            }
        }

        if (outOfBagCount > 0) {
            m_OutOfBagError = errorSum / outOfBagCount;
        }
    } else {
        m_OutOfBagError = 0;
    }

    // save memory
    m_data = null;
}

From source file:Bilbo.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test
 * instance./*from  w  ww  .  j  a  v a  2 s .c om*/
 *
 * @param instance the instance to be classified
 * @return preedicted class probability distribution
 * @throws Exception if distribution can't be computed successfully 
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    double[] sums = new double[instance.numClasses()], newProbs;

    double numPreds = 0;
    for (int i = 0; i < m_NumIterations; i++) {
        if (instance.classAttribute().isNumeric() == true) {
            double pred = ((NewTree) m_Classifiers[i]).classifyInstance(instance);
            if (!Utils.isMissingValue(pred)) {
                sums[0] += pred;
                numPreds++;
            }
        } else {
            newProbs = ((NewTree) m_Classifiers[i]).distributionForInstance(instance);
            for (int j = 0; j < newProbs.length; j++)
                sums[j] += newProbs[j];
        }
    }
    if (instance.classAttribute().isNumeric() == true) {
        if (numPreds == 0) {
            sums[0] = Utils.missingValue();
        } else {
            sums[0] /= numPreds;
        }
        return sums;
    } else if (Utils.eq(Utils.sum(sums), 0)) {
        return sums;
    } else {
        Utils.normalize(sums);
        return sums;
    }
}

From source file:adams.data.conversion.AbstractMatchWekaInstanceAgainstHeader.java

License:Open Source License

/**
 * Matches the input instance against the header.
 *
 * @param input   the Instance to align to the header
 * @return      the aligned Instance/*  w  w w.jav  a2s  .  c  o m*/
 */
protected Instance match(Instance input) {
    Instance result;
    double[] values;
    int i;

    values = new double[m_Dataset.numAttributes()];
    for (i = 0; i < m_Dataset.numAttributes(); i++) {
        values[i] = Utils.missingValue();
        switch (m_Dataset.attribute(i).type()) {
        case Attribute.NUMERIC:
        case Attribute.DATE:
            values[i] = input.value(i);
            break;
        case Attribute.NOMINAL:
            if (m_Dataset.attribute(i).indexOfValue(input.stringValue(i)) != -1)
                values[i] = m_Dataset.attribute(i).indexOfValue(input.stringValue(i));
            break;
        case Attribute.STRING:
            values[i] = m_Dataset.attribute(i).addStringValue(input.stringValue(i));
            break;
        case Attribute.RELATIONAL:
            values[i] = m_Dataset.attribute(i).addRelation(input.relationalValue(i));
            break;
        default:
            throw new IllegalStateException(
                    "Unhandled attribute type: " + Attribute.typeToString(m_Dataset.attribute(i).type()));
        }
    }

    if (input instanceof SparseInstance)
        result = new SparseInstance(input.weight(), values);
    else
        result = new DenseInstance(input.weight(), values);
    result.setDataset(m_Dataset);

    // fix class index, if necessary
    if ((input.classIndex() != m_Dataset.classIndex()) && (m_Dataset.classIndex() < 0))
        m_Dataset.setClassIndex(input.classIndex());

    return result;
}

From source file:adams.data.featureconverter.Weka.java

License:Open Source License

/**
 * Performs the actual generation of a row from the raw data.
 * /*from  w w w .  j  ava  2 s.c  om*/
 * @param data   the data of the row, elements can be null (= missing)
 * @return      the dataset structure
 */
@Override
protected Instance doGenerateRow(List<Object> data) {
    Instance result;
    int i;
    Object obj;
    double[] values;

    values = new double[m_Header.numAttributes()];

    for (i = 0; i < data.size(); i++) {
        obj = data.get(i);
        if (obj == null) {
            values[i] = Utils.missingValue();
            continue;
        }
        switch (m_HeaderDefinition.getType(i)) {
        case BOOLEAN:
            values[i] = ((Boolean) obj) ? 0.0 : 1.0;
            break;
        case NUMERIC:
            values[i] = ((Number) obj).doubleValue();
            break;
        case STRING:
        case UNKNOWN:
            values[i] = m_Header.attribute(i).addStringValue(obj.toString());
            break;
        }
    }

    result = new DenseInstance(1.0, values);
    result.setDataset(m_Header);

    return result;
}

From source file:adams.flow.container.WekaPredictionContainer.java

License:Open Source License

/**
 * Initializes the container.//from   w  ww.  ja va2  s.c  om
 * <br><br>
 * Only used for generating help information.
 */
public WekaPredictionContainer() {
    this(null, Utils.missingValue(), new double[0]);
}

From source file:adams.gui.visualization.instances.InstancesTableModel.java

License:Open Source License

public void insertInstance(int index, boolean notify) {
    if (!m_IgnoreChanges)
        addUndoPoint();//from   w  w w  . j  a v  a 2 s.  c o m
    double[] vals = new double[m_Data.numAttributes()];

    // set any string or relational attribute values to missing
    // in the new instance, just in case this is the very first
    // instance in the dataset.
    for (int i = 0; i < m_Data.numAttributes(); i++) {
        if (m_Data.attribute(i).isString() || m_Data.attribute(i).isRelationValued()) {
            vals[i] = Utils.missingValue();
        }
    }
    Instance toAdd = new DenseInstance(1.0, vals);
    if (index < 0)
        m_Data.add(toAdd);
    else
        m_Data.add(index, toAdd);
    if (notify) {
        notifyListener(new TableModelEvent(this, m_Data.numInstances() - 1, m_Data.numInstances() - 1,
                TableModelEvent.ALL_COLUMNS, TableModelEvent.INSERT));
    }
}

From source file:adams.gui.visualization.instances.InstancesTableModel.java

License:Open Source License

/**
 * Sets the value in the cell at columnIndex and rowIndex to aValue. but only
 * the value and the value can be changed. Ignores operation if value hasn't
 * changed.//from ww  w  . j  a  va2s .c  o  m
 *
 * @param aValue the new value
 * @param rowIndex the row index
 * @param columnIndex the column index
 * @param notify whether to notify the listeners
 */
public void setValueAt(Object aValue, int rowIndex, int columnIndex, boolean notify) {
    int type;
    int index;
    String tmp;
    Instance inst;
    Attribute att;
    Object oldValue;
    boolean different;
    int offset;

    offset = 1;
    if (m_ShowWeightsColumn)
        offset++;

    oldValue = getValueAt(rowIndex, columnIndex);
    different = !("" + oldValue).equals("" + aValue);
    if (!different)
        return;

    if (!m_IgnoreChanges)
        addUndoPoint();

    type = getType(rowIndex, columnIndex);
    index = columnIndex - offset;
    inst = m_Data.instance(rowIndex);
    att = inst.attribute(index);

    // missing?
    if (aValue == null) {
        inst.setValue(index, Utils.missingValue());
    } else {
        tmp = aValue.toString();

        switch (type) {
        case Attribute.DATE:
            try {
                att.parseDate(tmp);
                inst.setValue(index, att.parseDate(tmp));
            } catch (Exception e) {
                // ignore
            }
            break;

        case Attribute.NOMINAL:
            if (att.indexOfValue(tmp) > -1)
                inst.setValue(index, att.indexOfValue(tmp));
            break;

        case Attribute.STRING:
            inst.setValue(index, tmp);
            break;

        case Attribute.NUMERIC:
            try {
                inst.setValue(index, Double.parseDouble(tmp));
            } catch (Exception e) {
                // ignore
            }
            break;

        case Attribute.RELATIONAL:
            try {
                inst.setValue(index, inst.attribute(index).addRelation((Instances) aValue));
            } catch (Exception e) {
                // ignore
            }
            break;

        default:
            throw new IllegalArgumentException("Unsupported Attribute type: " + type + "!");
        }
    }

    // notify only if the value has changed!
    if (notify)
        notifyListener(new TableModelEvent(this, rowIndex, columnIndex));
}

From source file:adams.ml.data.WekaConverter.java

License:Open Source License

/**
 * Turns an ADAMS dataset row into a Weka Instance.
 *
 * @param data   the dataset to use as template
 * @param row      the row to convert//from w  ww .  j a v a2  s . c  o m
 * @return      the generated instance
 * @throws Exception   if conversion fails
 */
public static Instance toInstance(Instances data, Row row) throws Exception {
    Instance result;
    double[] values;
    int i;
    Cell cell;
    Attribute att;

    values = new double[data.numAttributes()];
    for (i = 0; i < data.numAttributes(); i++) {
        values[i] = Utils.missingValue();

        if (!row.hasCell(i))
            continue;
        cell = row.getCell(i);
        if (cell.isMissing())
            continue;

        att = data.attribute(i);
        switch (att.type()) {
        case Attribute.NUMERIC:
            values[i] = cell.toDouble();
            break;
        case Attribute.DATE:
            values[i] = cell.toAnyDateType().getTime();
            break;
        case Attribute.NOMINAL:
            values[i] = att.indexOfValue(cell.getContent());
            break;
        case Attribute.STRING:
            values[i] = att.addStringValue(cell.getContent());
            break;
        default:
            throw new Exception("Unhandled Weka attribute type: " + Attribute.typeToString(att));
        }
    }

    result = new DenseInstance(1.0, values);
    result.setDataset(data);

    return result;
}

From source file:br.ufrn.ia.core.clustering.SimpleKMeansIaProject.java

License:Open Source License

public void buildClusterer(Instances data) throws Exception {

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    m_Iterations = 0;//w w  w.  j  a  va2 s  .  c o  m

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);

    instances.setClassIndex(-1);
    if (!m_dontReplaceMissing) {
        m_ReplaceMissingFilter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
    }

    m_FullMissingCounts = new int[instances.numAttributes()];
    if (m_displayStdDevs) {
        m_FullStdDevs = new double[instances.numAttributes()];
    }
    m_FullNominalCounts = new int[instances.numAttributes()][0];

    m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false);
    for (int i = 0; i < instances.numAttributes(); i++) {
        m_FullMissingCounts[i] = instances.attributeStats(i).missingCount;
        if (instances.attribute(i).isNumeric()) {
            if (m_displayStdDevs) {
                m_FullStdDevs[i] = Math.sqrt(instances.variance(i));
            }
            if (m_FullMissingCounts[i] == instances.numInstances()) {
                m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing
                // as mean
            }
        } else {
            m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts;
            if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) {
                m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most
                // common value
            }
        }
    }

    m_ClusterCentroids = new Instances(instances, m_NumClusters);
    int[] clusterAssignments = new int[instances.numInstances()];

    if (m_PreserveOrder)
        m_Assignments = clusterAssignments;

    m_DistanceFunction.setInstances(instances);

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap initC = new HashMap();
    DecisionTableHashKey hk = null;

    Instances initInstances = null;
    if (m_PreserveOrder)
        initInstances = new Instances(instances);
    else
        initInstances = instances;

    for (int j = initInstances.numInstances() - 1; j >= 0; j--) {
        instIndex = RandomO.nextInt(j + 1);
        hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true);
        if (!initC.containsKey(hk)) {
            m_ClusterCentroids.add(initInstances.instance(instIndex));
            initC.put(hk, null);
        }
        initInstances.swap(j, instIndex);

        if (m_ClusterCentroids.numInstances() == m_NumClusters) {
            break;
        }
    }

    m_NumClusters = m_ClusterCentroids.numInstances();

    // removing reference
    initInstances = null;

    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
    m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()];
    while (!converged) {
        emptyClusterCount = 0;
        m_Iterations++;
        converged = true;
        for (i = 0; i < instances.numInstances(); i++) {
            Instance toCluster = instances.instance(i);
            int newC = clusterProcessedInstance(toCluster, true);
            if (newC != clusterAssignments[i]) {
                converged = false;
            }
            clusterAssignments[i] = newC;
        }

        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        for (i = 0; i < m_NumClusters; i++) {
            if (tempI[i].numInstances() == 0) {
                // empty cluster
                emptyClusterCount++;
            } else {
                moveCentroid(i, tempI[i], true);
            }
        }

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            if (converged) {
                Instances[] t = new Instances[m_NumClusters];
                int index = 0;
                for (int k = 0; k < tempI.length; k++) {
                    if (tempI[k].numInstances() > 0) {
                        t[index++] = tempI[k];
                    }
                }
                tempI = t;
            } else {
                tempI = new Instances[m_NumClusters];
            }
        }

        if (m_Iterations == m_MaxIterations)
            converged = true;

        if (!converged) {
            m_squaredErrors = new double[m_NumClusters];
            m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
        }
    }

    if (m_displayStdDevs) {
        m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    }
    m_ClusterSizes = new int[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        if (m_displayStdDevs) {
            double[] vals2 = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); j++) {
                if (instances.attribute(j).isNumeric()) {
                    vals2[j] = Math.sqrt(tempI[i].variance(j));
                } else {
                    vals2[j] = Utils.missingValue();
                }
            }
            m_ClusterStdDevs.add(new DenseInstance(1.0, vals2));
        }
        m_ClusterSizes[i] = tempI[i].numInstances();
    }
}

From source file:br.ufrn.ia.core.clustering.SimpleKMeansIaProject.java

License:Open Source License

protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) {
    double[] vals = new double[members.numAttributes()];

    // used only for Manhattan Distance
    Instances sortedMembers = null;/*from w  w  w. j a va 2 s  . c om*/
    int middle = 0;
    boolean dataIsEven = false;

    if (m_DistanceFunction instanceof ManhattanDistance) {
        middle = (members.numInstances() - 1) / 2;
        dataIsEven = ((members.numInstances() % 2) == 0);
        if (m_PreserveOrder) {
            sortedMembers = members;
        } else {
            sortedMembers = new Instances(members);
        }
    }

    for (int j = 0; j < members.numAttributes(); j++) {

        // in case of Euclidian distance the centroid is the mean point
        // in case of Manhattan distance the centroid is the median point
        // in both cases, if the attribute is nominal, the centroid is the
        // mode
        if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) {
            vals[j] = members.meanOrMode(j);
        } else if (m_DistanceFunction instanceof ManhattanDistance) {
            // singleton special case
            if (members.numInstances() == 1) {
                vals[j] = members.instance(0).value(j);
            } else {
                sortedMembers.kthSmallestValue(j, middle + 1);
                vals[j] = sortedMembers.instance(middle).value(j);
                if (dataIsEven) {
                    sortedMembers.kthSmallestValue(j, middle + 2);
                    vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2;
                }
            }
        }

        if (updateClusterInfo) {
            m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount;
            m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts;
            if (members.attribute(j).isNominal()) {
                if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils
                        .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) {
                    vals[j] = Utils.missingValue(); // mark mode as missing
                }
            } else {
                if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) {
                    vals[j] = Utils.missingValue(); // mark mean as missing
                }
            }
        }
    }
    if (updateClusterInfo)
        m_ClusterCentroids.add(new DenseInstance(1.0, vals));
    return vals;
}