Example usage for weka.filters.unsupervised.attribute ReplaceMissingValues setInputFormat

List of usage examples for weka.filters.unsupervised.attribute ReplaceMissingValues setInputFormat

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute ReplaceMissingValues setInputFormat.

Prototype

public boolean setInputFormat(Instances instanceInfo) throws Exception 

Source Link

Document

Sets the format of the input instances.

Usage

From source file:com.actelion.research.orbit.imageAnalysis.models.OrbitModel.java

License:Open Source License

/**
 * convert models from old weka version//from  www . j  a  v  a  2 s .com
 *
 * @param model
 */
public static void fixOldModelVersion(final OrbitModel model) {
    if (model == null)
        return; // nothing to fix
    boolean oldWekaVersion = false;
    try {
        model.getStructure().classAttribute().numValues();
    } catch (NullPointerException ne) {
        oldWekaVersion = true;
    }

    // apply old model fix?
    if (oldWekaVersion) {
        logger.info("model from old weka version (< 3.7.11) detected, trying to apply fixes");
        int numClasses = model.getClassShapes().size();
        TissueFeatures tf = new TissueFeatures(model.getFeatureDescription(), null);
        int numFeatures = tf.getFeaturesPerSample() * model.getFeatureDescription().getSampleSize() + 1;
        ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(numFeatures);
        for (int a = 0; a < numFeatures - 1; a++) {
            Attribute attr = new Attribute("a" + a);
            attrInfo.add(attr);
        }
        List<String> classValues = new ArrayList<String>(numClasses);
        for (int i = 0; i < numClasses; i++) {
            classValues.add((i + 1) + ".0"); // "1.0", "2.0", ...
        }
        Attribute classAttr = new Attribute("class", classValues);
        attrInfo.add(classAttr);

        Instances structure = new Instances("trainSet pattern classes", attrInfo, 0);
        structure.setClassIndex(numFeatures - 1);
        model.setStructure(structure);

        try {
            if (model.getClassifier() != null && model.getClassifier().getClassifier() != null
                    && model.getClassifier().getClassifier() instanceof SMO) {
                SMO smo = ((SMO) model.getClassifier().getClassifier());

                Field field = smo.getClass().getDeclaredField("m_classAttribute");
                field.setAccessible(true);
                field.set(smo, classAttr);

                // missing values
                ReplaceMissingValues rmv = new ReplaceMissingValues();
                rmv.setInputFormat(structure);

                Field missing = smo.getClass().getDeclaredField("m_Missing");
                missing.setAccessible(true);
                missing.set(smo, rmv);

                // filter
                Field filter = smo.getClass().getDeclaredField("m_Filter");
                filter.setAccessible(true);
                Filter normalize = (Filter) filter.get(smo);

                RelationalLocator relLoc = new RelationalLocator(structure);
                StringLocator strLoc = new StringLocator(structure);

                Field outputRelAtts = normalize.getClass().getSuperclass().getSuperclass()
                        .getDeclaredField("m_OutputRelAtts");
                outputRelAtts.setAccessible(true);
                outputRelAtts.set(normalize, relLoc);

                Field inputRelAtts = normalize.getClass().getSuperclass().getSuperclass()
                        .getDeclaredField("m_InputRelAtts");
                inputRelAtts.setAccessible(true);
                inputRelAtts.set(normalize, relLoc);

                Field outputStrAtts = normalize.getClass().getSuperclass().getSuperclass()
                        .getDeclaredField("m_OutputStringAtts");
                outputStrAtts.setAccessible(true);
                outputStrAtts.set(normalize, strLoc);

                Field inputStrAtts = normalize.getClass().getSuperclass().getSuperclass()
                        .getDeclaredField("m_InputStringAtts");
                inputStrAtts.setAccessible(true);
                inputStrAtts.set(normalize, strLoc);

                Field outputFormat = normalize.getClass().getSuperclass().getSuperclass()
                        .getDeclaredField("m_OutputFormat");
                outputFormat.setAccessible(true);
                outputFormat.set(normalize, structure);

                logger.info("fixes applied, the model should work with a weka version >= 3.7.11 now");
            } // else: good luck...
        } catch (Exception e) {
            e.printStackTrace();
            logger.error("new weka version fixes could not be applied: " + e.getMessage());
        }
    } // old weka version
    fixOldModelVersion(model.getSegmentationModel()); // fixOldModelVersion can handle null
    fixOldModelVersion(model.getSecondarySegmentationModel()); // fixOldModelVersion can handle null
    fixOldModelVersion(model.getExclusionModel()); // fixOldModelVersion can handle null
}

From source file:ml.dataprocess.CorrelationAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Replaces missing
 * values with means/modes; Deletes instances with missing class values.
 * //from www.j  av  a  2 s .c om
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
@Override
public void buildEvaluator(Instances data) throws Exception {
    data = new Instances(data);
    data.deleteWithMissingClass();

    ReplaceMissingValues rmv = new ReplaceMissingValues();
    rmv.setInputFormat(data);
    data = Filter.useFilter(data, rmv);

    int numClasses = data.classAttribute().numValues();
    int classIndex = data.classIndex();
    int numInstances = data.numInstances();
    m_correlations = new double[data.numAttributes()];
    /*
     * boolean hasNominals = false; boolean hasNumerics = false;
     */
    List<Integer> numericIndexes = new ArrayList<Integer>();
    List<Integer> nominalIndexes = new ArrayList<Integer>();
    if (m_detailedOutput) {
        m_detailedOutputBuff = new StringBuffer();
    }

    // TODO for instance weights (folded into computing weighted correlations)
    // add another dimension just before the last [2] (0 for 0/1 binary vector
    // and
    // 1 for corresponding instance weights for the 1's)
    double[][][] nomAtts = new double[data.numAttributes()][][];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (data.attribute(i).isNominal() && i != classIndex) {
            nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()];
            Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all
                                             // 1's
            nominalIndexes.add(i);
        } else if (data.attribute(i).isNumeric() && i != classIndex) {
            numericIndexes.add(i);
        }
    }

    // do the nominal attributes
    if (nominalIndexes.size() > 0) {
        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            for (int j = 0; j < current.numValues(); j++) {
                if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) {
                    // Will need to check for zero in case this isn't a sparse
                    // instance (unless we add 1 and subtract 1)
                    nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1;
                    nomAtts[current.index(j)][0][i] -= 1;
                }
            }
        }
    }

    if (data.classAttribute().isNumeric()) {
        double[] classVals = data.attributeToDoubleArray(classIndex);

        // do the numeric attributes
        for (Integer i : numericIndexes) {
            double[] numAttVals = data.attributeToDoubleArray(i);
            m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length);

            if (m_correlations[i] == 1.0) {
                // check for zero variance (useless numeric attribute)
                if (Utils.variance(numAttVals) == 0) {
                    m_correlations[i] = 0;
                }
            }
        }

        // do the nominal attributes
        if (nominalIndexes.size() > 0) {

            // now compute the correlations for the binarized nominal attributes
            for (Integer i : nominalIndexes) {
                double sum = 0;
                double corr = 0;
                double sumCorr = 0;
                double sumForValue = 0;

                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    sumForValue = Utils.sum(nomAtts[i][j]);
                    corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length);

                    // useless attribute - all instances have the same value
                    if (sumForValue == numInstances || sumForValue == 0) {
                        corr = 0;
                    }
                    if (corr < 0.0) {
                        corr = -corr;
                    }
                    sumCorr += sumForValue * corr;
                    sum += sumForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(corr, 6));
                    }
                }
                m_correlations[i] = (sum > 0) ? sumCorr / sum : 0;
            }
        }
    } else {
        // class is nominal
        // TODO extra dimension for storing instance weights too
        double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()];

        // this is equal to the number of instances for all inst weights = 1
        double[] classValCounts = new double[data.classAttribute().numValues()];

        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            binarizedClasses[(int) current.classValue()][i] = 1;
        }
        for (int i = 0; i < data.classAttribute().numValues(); i++) {
            classValCounts[i] = Utils.sum(binarizedClasses[i]);
        }

        double sumClass = Utils.sum(classValCounts);

        // do numeric attributes first
        if (numericIndexes.size() > 0) {
            for (Integer i : numericIndexes) {
                double[] numAttVals = data.attributeToDoubleArray(i);
                double corr = 0;
                double sumCorr = 0;

                for (int j = 0; j < data.classAttribute().numValues(); j++) {
                    corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length);
                    if (corr < 0.0) {
                        corr = -corr;
                    }

                    if (corr == 1.0) {
                        // check for zero variance (useless numeric attribute)
                        if (Utils.variance(numAttVals) == 0) {
                            corr = 0;
                        }
                    }

                    sumCorr += classValCounts[j] * corr;
                }
                m_correlations[i] = sumCorr / sumClass;
            }
        }

        if (nominalIndexes.size() > 0) {
            for (Integer i : nominalIndexes) {
                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                double sumForAtt = 0;
                double corrForAtt = 0;
                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    double sumForValue = Utils.sum(nomAtts[i][j]);
                    double corr = 0;
                    double sumCorr = 0;
                    double avgCorrForValue = 0;

                    sumForAtt += sumForValue;
                    for (int k = 0; k < numClasses; k++) {

                        // corr between value j and class k
                        corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k],
                                binarizedClasses[k].length);

                        // useless attribute - all instances have the same value
                        if (sumForValue == numInstances || sumForValue == 0) {
                            corr = 0;
                        }
                        if (corr < 0.0) {
                            corr = -corr;
                        }
                        sumCorr += classValCounts[k] * corr;
                    }
                    avgCorrForValue = sumCorr / sumClass;
                    corrForAtt += sumForValue * avgCorrForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6));
                    }
                }

                // the weighted average corr for att i as
                // a whole (wighted by value frequencies)
                m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0;
            }
        }
    }

    if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) {
        m_detailedOutputBuff.append("\n");
    }
}

From source file:org.opentox.jaqpot3.qsar.predictor.MissingValueFilterPredictor.java

License:Open Source License

@Override
public Instances predict(Instances data) throws JaqpotException {
    HashSet<String> ignoredUris = (HashSet<String>) model.getActualModel().getSerializableActualModel();
    for (String attribute2Bignored : ignoredUris) {
        Attribute attr = data.attribute(attribute2Bignored);
        if (attr != null) {
            data.deleteAttributeAt(attr.index());
        }//ww  w  . ja  va  2 s.  c o m
    }
    updateFeatureMap(model);
    weka.filters.unsupervised.attribute.ReplaceMissingValues replacer = new ReplaceMissingValues();

    try {
        replacer.setInputFormat(data);
    } catch (Exception ex) {
        Logger.getLogger(MissingValueFilterPredictor.class.getName()).log(Level.SEVERE, null, ex);
        throw new JaqpotException(ex);
    }

    Iterator<String> features = featureToMVH.keySet().iterator();
    String nextFeature = null;
    Attribute currentAttribute = null;
    while (features.hasNext()) {
        nextFeature = features.next();
        currentAttribute = data.attribute(nextFeature);
        if (currentAttribute == null) {
            throw new JaqpotException(
                    "The dataset you provided does not contain the necessary " + "feature : " + nextFeature);
        }
        data.renameAttribute(currentAttribute, featureToMVH.get(nextFeature));
    }

    return data;
}

From source file:org.opentox.jaqpot3.qsar.util.SimpleMVHFilter.java

License:Open Source License

public Instances filter(Instances data) throws JaqpotException {

    ReplaceMissingValues replacer = new ReplaceMissingValues();
    try {//www  .j a  va  2s.c om
        replacer.setInputFormat(data);
        replacer.setIgnoreClass(ignoreClass);
        Instances filtered_data = ReplaceMissingValues.useFilter(data, replacer);
        return filtered_data;
    } catch (Exception ex) {
        throw new JaqpotException("Cannot apply missing values filtering", ex);
    }
}

From source file:org.opentox.qsar.processors.filters.SimpleMVHFilter.java

License:Open Source License

@Override
public Instances filter(Instances data) throws QSARException {
    ReplaceMissingValues replacer = new ReplaceMissingValues();
    try {/*ww w  .j av  a  2s.c  om*/
        replacer.setInputFormat(data);
        return ReplaceMissingValues.useFilter(data, replacer);
    } catch (Exception ex) {
        String message = "Cannot apply missing values filtering";
        throw new QSARException(Cause.XQF412, message, ex);
    }

}

From source file:org.tigr.microarray.mev.cluster.gui.impl.bn.PrepareArrayDataModule.java

License:Open Source License

/**
 * Describe <code>replaceMissingValues</code> method is given a WEKA Instances object corresponding to
 * the gene expression data//from w w w  . j  av  a  2s  .c o  m
 * and returns a new WEKA Instances object with missing values replaced, if any
 *
 * @param data an <code>Instances</code> which is a WEKA Instances object corresponding to the gene expression data
 * @return an <code>Instances</code> corresponding to a new WEKA Instances object with missing values replaced, if any
 * @exception NullArgumentException if an error occurs if the given data is null
 */
public static Instances replaceMissingValues(Instances data) throws NullArgumentException {
    if (data == null) {
        throw new NullArgumentException("replace values passed to discretize method is null!");
    }
    try {
        ReplaceMissingValues replaceMissingValues = new ReplaceMissingValues();
        replaceMissingValues.setInputFormat(data);
        Instances newData = Filter.useFilter(data, replaceMissingValues);
        return newData;
    } catch (Exception e) {
        System.out.println(e);
        e.printStackTrace();
    }
    return null;
}