List of usage examples for weka.core Instances checkForAttributeType
public boolean checkForAttributeType(int attType)
From source file:org.opentox.qsar.processors.trainers.classification.WekaClassifier.java
License:Open Source License
@Override public Instances preprocessData(Instances data) throws QSARException { /*//from www. j a v a2s . co m * TODO: In case a client choses a non-nominal feature for the classifier, * provide a list of some available nominal features. */ if (data == null) { throw new NullPointerException("Cannot train a classification model without data"); } /* The incoming dataset always has the first attribute set to 'compound_uri' which is of type "String". This is removed at the begining of the training procedure */ AttributeCleanup filter = new AttributeCleanup(ATTRIBUTE_TYPE.string); // NOTE: Removal of string attributes should be always performed prior to any kind of training! data = filter.filter(data); SimpleMVHFilter fil = new SimpleMVHFilter(); data = fil.filter(data); // CHECK IF THE GIVEN URI IS AN ATTRIBUTE OF THE DATASET Attribute classAttribute = data.attribute(predictionFeature); if (classAttribute == null) { throw new QSARException(Cause.XQReg202, "The prediction feature you provided is not a valid numeric attribute of the dataset :{" + predictionFeature + "}"); } // CHECK IF THE DATASET CONTAINS ANY NOMINAL ATTRIBUTES if (!data.checkForAttributeType(Attribute.NOMINAL)) { throw new QSARException(Cause.XQC4040, "Improper dataset! The dataset you provided has no " + "nominal features therefore classification models cannot be built."); } // CHECK WHETHER THE CLASS ATTRIBUTE IS NOMINAL if (!classAttribute.isNominal()) { StringBuilder list_of_nominal_features = new StringBuilder(); int j = 0; for (int i = 0; i < data.numAttributes() && j < 10; i++) { if (data.attribute(i).isNominal()) { j++; list_of_nominal_features.append(data.attribute(i).name() + "\n"); } } throw new QSARException(Cause.XQC4041, "The prediction feature you provided " + "is not a nominal. Here is a list of some nominal features in the dataset you might " + "be interested in :\n" + list_of_nominal_features.toString()); } // CHECK IF THE RANGE OF THE CLASS ATTRIBUTE IS NON-UNARY Enumeration nominalValues = classAttribute.enumerateValues(); String v = nominalValues.nextElement().toString(); if (!nominalValues.hasMoreElements()) { throw new QSARException(Cause.XQC4042, "This classifier cannot handle unary nominal classes, that is " + "nominal class attributes whose range includes only one value. Singleton value : {" + v + "}"); } // SET THE CLASS ATTRIBUTE OF THE DATASET data.setClass(classAttribute); return data; }