List of usage examples for weka.filters.unsupervised.attribute ReplaceMissingValues setInputFormat
public boolean setInputFormat(Instances instanceInfo) throws Exception
From source file:com.actelion.research.orbit.imageAnalysis.models.OrbitModel.java
License:Open Source License
/** * convert models from old weka version//from www . j a v a 2 s .com * * @param model */ public static void fixOldModelVersion(final OrbitModel model) { if (model == null) return; // nothing to fix boolean oldWekaVersion = false; try { model.getStructure().classAttribute().numValues(); } catch (NullPointerException ne) { oldWekaVersion = true; } // apply old model fix? if (oldWekaVersion) { logger.info("model from old weka version (< 3.7.11) detected, trying to apply fixes"); int numClasses = model.getClassShapes().size(); TissueFeatures tf = new TissueFeatures(model.getFeatureDescription(), null); int numFeatures = tf.getFeaturesPerSample() * model.getFeatureDescription().getSampleSize() + 1; ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(numFeatures); for (int a = 0; a < numFeatures - 1; a++) { Attribute attr = new Attribute("a" + a); attrInfo.add(attr); } List<String> classValues = new ArrayList<String>(numClasses); for (int i = 0; i < numClasses; i++) { classValues.add((i + 1) + ".0"); // "1.0", "2.0", ... } Attribute classAttr = new Attribute("class", classValues); attrInfo.add(classAttr); Instances structure = new Instances("trainSet pattern classes", attrInfo, 0); structure.setClassIndex(numFeatures - 1); model.setStructure(structure); try { if (model.getClassifier() != null && model.getClassifier().getClassifier() != null && model.getClassifier().getClassifier() instanceof SMO) { SMO smo = ((SMO) model.getClassifier().getClassifier()); Field field = smo.getClass().getDeclaredField("m_classAttribute"); field.setAccessible(true); field.set(smo, classAttr); // missing values ReplaceMissingValues rmv = new ReplaceMissingValues(); rmv.setInputFormat(structure); Field missing = smo.getClass().getDeclaredField("m_Missing"); missing.setAccessible(true); missing.set(smo, rmv); // filter Field filter = smo.getClass().getDeclaredField("m_Filter"); filter.setAccessible(true); Filter normalize = (Filter) filter.get(smo); RelationalLocator relLoc = new RelationalLocator(structure); StringLocator strLoc = new StringLocator(structure); Field outputRelAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_OutputRelAtts"); outputRelAtts.setAccessible(true); outputRelAtts.set(normalize, relLoc); Field inputRelAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_InputRelAtts"); inputRelAtts.setAccessible(true); inputRelAtts.set(normalize, relLoc); Field outputStrAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_OutputStringAtts"); outputStrAtts.setAccessible(true); outputStrAtts.set(normalize, strLoc); Field inputStrAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_InputStringAtts"); inputStrAtts.setAccessible(true); inputStrAtts.set(normalize, strLoc); Field outputFormat = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_OutputFormat"); outputFormat.setAccessible(true); outputFormat.set(normalize, structure); logger.info("fixes applied, the model should work with a weka version >= 3.7.11 now"); } // else: good luck... } catch (Exception e) { e.printStackTrace(); logger.error("new weka version fixes could not be applied: " + e.getMessage()); } } // old weka version fixOldModelVersion(model.getSegmentationModel()); // fixOldModelVersion can handle null fixOldModelVersion(model.getSecondarySegmentationModel()); // fixOldModelVersion can handle null fixOldModelVersion(model.getExclusionModel()); // fixOldModelVersion can handle null }
From source file:ml.dataprocess.CorrelationAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Replaces missing * values with means/modes; Deletes instances with missing class values. * //from www.j av a 2 s .c om * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ @Override public void buildEvaluator(Instances data) throws Exception { data = new Instances(data); data.deleteWithMissingClass(); ReplaceMissingValues rmv = new ReplaceMissingValues(); rmv.setInputFormat(data); data = Filter.useFilter(data, rmv); int numClasses = data.classAttribute().numValues(); int classIndex = data.classIndex(); int numInstances = data.numInstances(); m_correlations = new double[data.numAttributes()]; /* * boolean hasNominals = false; boolean hasNumerics = false; */ List<Integer> numericIndexes = new ArrayList<Integer>(); List<Integer> nominalIndexes = new ArrayList<Integer>(); if (m_detailedOutput) { m_detailedOutputBuff = new StringBuffer(); } // TODO for instance weights (folded into computing weighted correlations) // add another dimension just before the last [2] (0 for 0/1 binary vector // and // 1 for corresponding instance weights for the 1's) double[][][] nomAtts = new double[data.numAttributes()][][]; for (int i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).isNominal() && i != classIndex) { nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()]; Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all // 1's nominalIndexes.add(i); } else if (data.attribute(i).isNumeric() && i != classIndex) { numericIndexes.add(i); } } // do the nominal attributes if (nominalIndexes.size() > 0) { for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); for (int j = 0; j < current.numValues(); j++) { if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) { // Will need to check for zero in case this isn't a sparse // instance (unless we add 1 and subtract 1) nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1; nomAtts[current.index(j)][0][i] -= 1; } } } } if (data.classAttribute().isNumeric()) { double[] classVals = data.attributeToDoubleArray(classIndex); // do the numeric attributes for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length); if (m_correlations[i] == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { m_correlations[i] = 0; } } } // do the nominal attributes if (nominalIndexes.size() > 0) { // now compute the correlations for the binarized nominal attributes for (Integer i : nominalIndexes) { double sum = 0; double corr = 0; double sumCorr = 0; double sumForValue = 0; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } for (int j = 0; j < data.attribute(i).numValues(); j++) { sumForValue = Utils.sum(nomAtts[i][j]); corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += sumForValue * corr; sum += sumForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(corr, 6)); } } m_correlations[i] = (sum > 0) ? sumCorr / sum : 0; } } } else { // class is nominal // TODO extra dimension for storing instance weights too double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()]; // this is equal to the number of instances for all inst weights = 1 double[] classValCounts = new double[data.classAttribute().numValues()]; for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); binarizedClasses[(int) current.classValue()][i] = 1; } for (int i = 0; i < data.classAttribute().numValues(); i++) { classValCounts[i] = Utils.sum(binarizedClasses[i]); } double sumClass = Utils.sum(classValCounts); // do numeric attributes first if (numericIndexes.size() > 0) { for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); double corr = 0; double sumCorr = 0; for (int j = 0; j < data.classAttribute().numValues(); j++) { corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length); if (corr < 0.0) { corr = -corr; } if (corr == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { corr = 0; } } sumCorr += classValCounts[j] * corr; } m_correlations[i] = sumCorr / sumClass; } } if (nominalIndexes.size() > 0) { for (Integer i : nominalIndexes) { if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } double sumForAtt = 0; double corrForAtt = 0; for (int j = 0; j < data.attribute(i).numValues(); j++) { double sumForValue = Utils.sum(nomAtts[i][j]); double corr = 0; double sumCorr = 0; double avgCorrForValue = 0; sumForAtt += sumForValue; for (int k = 0; k < numClasses; k++) { // corr between value j and class k corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k], binarizedClasses[k].length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += classValCounts[k] * corr; } avgCorrForValue = sumCorr / sumClass; corrForAtt += sumForValue * avgCorrForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6)); } } // the weighted average corr for att i as // a whole (wighted by value frequencies) m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0; } } } if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) { m_detailedOutputBuff.append("\n"); } }
From source file:org.opentox.jaqpot3.qsar.predictor.MissingValueFilterPredictor.java
License:Open Source License
@Override public Instances predict(Instances data) throws JaqpotException { HashSet<String> ignoredUris = (HashSet<String>) model.getActualModel().getSerializableActualModel(); for (String attribute2Bignored : ignoredUris) { Attribute attr = data.attribute(attribute2Bignored); if (attr != null) { data.deleteAttributeAt(attr.index()); }//ww w . ja va 2 s. c o m } updateFeatureMap(model); weka.filters.unsupervised.attribute.ReplaceMissingValues replacer = new ReplaceMissingValues(); try { replacer.setInputFormat(data); } catch (Exception ex) { Logger.getLogger(MissingValueFilterPredictor.class.getName()).log(Level.SEVERE, null, ex); throw new JaqpotException(ex); } Iterator<String> features = featureToMVH.keySet().iterator(); String nextFeature = null; Attribute currentAttribute = null; while (features.hasNext()) { nextFeature = features.next(); currentAttribute = data.attribute(nextFeature); if (currentAttribute == null) { throw new JaqpotException( "The dataset you provided does not contain the necessary " + "feature : " + nextFeature); } data.renameAttribute(currentAttribute, featureToMVH.get(nextFeature)); } return data; }
From source file:org.opentox.jaqpot3.qsar.util.SimpleMVHFilter.java
License:Open Source License
public Instances filter(Instances data) throws JaqpotException { ReplaceMissingValues replacer = new ReplaceMissingValues(); try {//www .j a va 2s.c om replacer.setInputFormat(data); replacer.setIgnoreClass(ignoreClass); Instances filtered_data = ReplaceMissingValues.useFilter(data, replacer); return filtered_data; } catch (Exception ex) { throw new JaqpotException("Cannot apply missing values filtering", ex); } }
From source file:org.opentox.qsar.processors.filters.SimpleMVHFilter.java
License:Open Source License
@Override public Instances filter(Instances data) throws QSARException { ReplaceMissingValues replacer = new ReplaceMissingValues(); try {/*ww w .j av a 2s.c om*/ replacer.setInputFormat(data); return ReplaceMissingValues.useFilter(data, replacer); } catch (Exception ex) { String message = "Cannot apply missing values filtering"; throw new QSARException(Cause.XQF412, message, ex); } }
From source file:org.tigr.microarray.mev.cluster.gui.impl.bn.PrepareArrayDataModule.java
License:Open Source License
/** * Describe <code>replaceMissingValues</code> method is given a WEKA Instances object corresponding to * the gene expression data//from w w w . j av a 2s .c o m * and returns a new WEKA Instances object with missing values replaced, if any * * @param data an <code>Instances</code> which is a WEKA Instances object corresponding to the gene expression data * @return an <code>Instances</code> corresponding to a new WEKA Instances object with missing values replaced, if any * @exception NullArgumentException if an error occurs if the given data is null */ public static Instances replaceMissingValues(Instances data) throws NullArgumentException { if (data == null) { throw new NullArgumentException("replace values passed to discretize method is null!"); } try { ReplaceMissingValues replaceMissingValues = new ReplaceMissingValues(); replaceMissingValues.setInputFormat(data); Instances newData = Filter.useFilter(data, replaceMissingValues); return newData; } catch (Exception e) { System.out.println(e); e.printStackTrace(); } return null; }