Example usage for weka.core Instances classIndex

List of usage examples for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex() 

Source Link

Document

Returns the class attribute's index.

Usage

From source file:org.opentox.jaqpot3.qsar.trainer.SvmRegression.java

License:Open Source License

@Override
public Model train(Instances data) throws JaqpotException {
    try {/*from  w w w  .  jav a2s .  c  o m*/
        Attribute target = data.attribute(targetUri.toString());
        if (target == null) {
            throw new QSARException("The prediction feature you provided was not found in the dataset");
        } else {
            if (!target.isNumeric()) {
                throw new QSARException("The prediction feature you provided is not numeric.");
            }
        }
        data.setClass(target);
        //data.deleteAttributeAt(0);//remove the first attribute, i.e. 'compound_uri' or 'URI'
        /* Very important: place the target feature at the end! (target = last)*/
        int numAttributes = data.numAttributes();
        int classIndex = data.classIndex();
        Instances orderedTrainingSet = null;
        List<String> properOrder = new ArrayList<String>(numAttributes);
        for (int j = 0; j < numAttributes; j++) {
            if (j != classIndex) {
                properOrder.add(data.attribute(j).name());
            }
        }
        properOrder.add(data.attribute(classIndex).name());
        try {
            orderedTrainingSet = InstancesUtil.sortByFeatureAttrList(properOrder, data, -1);
        } catch (JaqpotException ex) {
            logger.error(null, ex);
        }
        orderedTrainingSet.setClass(orderedTrainingSet.attribute(targetUri.toString()));

        getTask().getMeta()
                .addComment("Dataset successfully retrieved and converted into a weka.core.Instances object");
        UpdateTask firstTaskUpdater = new UpdateTask(getTask());
        firstTaskUpdater.setUpdateMeta(true);
        firstTaskUpdater.setUpdateTaskStatus(true);//TODO: Is this necessary?
        try {
            firstTaskUpdater.update();
        } catch (DbException ex) {
            throw new JaqpotException(ex);
        } finally {
            try {
                firstTaskUpdater.close();
            } catch (DbException ex) {
                throw new JaqpotException(ex);
            }
        }

        Model m = new Model(Configuration.getBaseUri().augment("model", getUuid().toString()));

        // INITIALIZE THE REGRESSOR regressor
        SVMreg regressor = new SVMreg();
        final String[] regressorOptions = { "-P", Double.toString(epsilon), "-T", Double.toString(tolerance) };
        Kernel svm_kernel = null;
        if (kernel.equalsIgnoreCase("rbf")) {
            RBFKernel rbf_kernel = new RBFKernel();
            rbf_kernel.setGamma(Double.parseDouble(Double.toString(gamma)));
            rbf_kernel.setCacheSize(Integer.parseInt(Integer.toString(cacheSize)));
            svm_kernel = rbf_kernel;
        } else if (kernel.equalsIgnoreCase("polynomial")) {
            PolyKernel poly_kernel = new PolyKernel();
            poly_kernel.setExponent(Double.parseDouble(Integer.toString(degree)));
            poly_kernel.setCacheSize(Integer.parseInt(Integer.toString(cacheSize)));
            poly_kernel.setUseLowerOrder(true);
            svm_kernel = poly_kernel;
        } else if (kernel.equalsIgnoreCase("linear")) {
            PolyKernel poly_kernel = new PolyKernel();
            poly_kernel.setExponent((double) 1.0);
            poly_kernel.setCacheSize(Integer.parseInt(Integer.toString(cacheSize)));
            poly_kernel.setUseLowerOrder(true);
            svm_kernel = poly_kernel;
        }

        try {
            regressor.setOptions(regressorOptions);
        } catch (final Exception ex) {
            throw new QSARException("Bad options in SVM trainer for epsilon = {" + epsilon + "} or "
                    + "tolerance = {" + tolerance + "}.", ex);
        }
        regressor.setKernel(svm_kernel);
        // START TRAINING & CREATE MODEL
        try {
            regressor.buildClassifier(orderedTrainingSet);

            // evaluate classifier and print some statistics
            Evaluation eval = new Evaluation(orderedTrainingSet);
            eval.evaluateModel(regressor, orderedTrainingSet);
            String stats = eval.toSummaryString("", false);

            ActualModel am = new ActualModel(regressor);
            am.setStatistics(stats);
            m.setActualModel(am);
            // m.setStatistics(stats);
        } catch (NotSerializableException ex) {
            String message = "Model is not serializable";
            logger.error(message, ex);
            throw new JaqpotException(message, ex);
        } catch (final Exception ex) {
            throw new QSARException("Unexpected condition while trying to train "
                    + "the model. Possible explanation : {" + ex.getMessage() + "}", ex);
        }

        m.setAlgorithm(getAlgorithm());
        m.setCreatedBy(getTask().getCreatedBy());
        m.setDataset(datasetUri);
        m.addDependentFeatures(dependentFeature);
        try {
            dependentFeature.loadFromRemote();
        } catch (ServiceInvocationException ex) {
            java.util.logging.Logger.getLogger(SvmRegression.class.getName()).log(Level.SEVERE, null, ex);
        }
        m.addDependentFeatures(dependentFeature);

        m.setIndependentFeatures(independentFeatures);

        String predictionFeatureUri = null;
        Feature predictedFeature = publishFeature(m, dependentFeature.getUnits(),
                "Feature created as prediction feature for SVM model " + m.getUri(), datasetUri,
                featureService);
        m.addPredictedFeatures(predictedFeature);
        predictionFeatureUri = predictedFeature.getUri().toString();

        getTask().getMeta().addComment("Prediction feature " + predictionFeatureUri + " was created.");

        /* SET PARAMETERS FOR THE TRAINED MODEL */
        m.setParameters(new HashSet<Parameter>());
        Parameter<String> kernelParam = new Parameter("kernel", new LiteralValue<String>(kernel))
                .setScope(Parameter.ParameterScope.OPTIONAL);
        kernelParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));
        Parameter<Double> costParam = new Parameter("cost", new LiteralValue<Double>(cost))
                .setScope(Parameter.ParameterScope.OPTIONAL);
        costParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));
        Parameter<Double> gammaParam = new Parameter("gamma", new LiteralValue<Double>(gamma))
                .setScope(Parameter.ParameterScope.OPTIONAL);
        gammaParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));
        Parameter<Double> epsilonParam = new Parameter("espilon", new LiteralValue<Double>(epsilon))
                .setScope(Parameter.ParameterScope.OPTIONAL);
        epsilonParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));
        Parameter<Integer> degreeParam = new Parameter("degree", new LiteralValue<Integer>(degree))
                .setScope(Parameter.ParameterScope.OPTIONAL);
        degreeParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));
        Parameter<Double> toleranceParam = new Parameter("tolerance", new LiteralValue<Double>(tolerance))
                .setScope(Parameter.ParameterScope.OPTIONAL);
        toleranceParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));

        m.getParameters().add(kernelParam);
        m.getParameters().add(costParam);
        m.getParameters().add(gammaParam);
        m.getParameters().add(epsilonParam);
        m.getParameters().add(degreeParam);
        m.getParameters().add(toleranceParam);

        //save the instances being predicted to abstract trainer for calculating DoA
        predictedInstances = orderedTrainingSet;
        excludeAttributesDoA.add(dependentFeature.getUri().toString());

        return m;
    } catch (QSARException ex) {
        logger.debug(null, ex);
        throw new JaqpotException(ex);
    }
}

From source file:org.opentox.qsar.processors.trainers.classification.NaiveBayesTrainer.java

License:Open Source License

public QSARModel train(Instances data) throws QSARException {

    // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
    // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
    final String rand = java.util.UUID.randomUUID().toString();
    final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
    final File tempFile = new File(temporaryFilePath);

    // SAVE THE DATA IN THE TEMPORARY FILE
    try {/*from   w ww.  ja  va2 s. co m*/
        ArffSaver dataSaver = new ArffSaver();
        dataSaver.setInstances(data);
        dataSaver.setDestination(new FileOutputStream(tempFile));
        dataSaver.writeBatch();
        if (!tempFile.exists()) {
            throw new IOException("Temporary File was not created");
        }
    } catch (final IOException ex) {/*
                                    * The content of the dataset cannot be
                                    * written to the destination file due to
                                    * some communication issue.
                                    */
        tempFile.delete();
        throw new RuntimeException(
                "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
    }

    NaiveBayes classifier = new NaiveBayes();

    String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
            /// Save the model in the following directory
            "-d", ServerFolders.models_weka + "/" + uuid };

    try {
        Evaluation.evaluateModel(classifier, generalOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                + "an SVM model. Possible explanation : {" + ex.getMessage() + "}", ex);
    }

    QSARModel model = new QSARModel();

    model.setParams(getParameters());
    model.setCode(uuid.toString());
    model.setAlgorithm(YaqpAlgorithms.NAIVE_BAYES);
    model.setDataset(datasetUri);
    model.setModelStatus(ModelStatus.UNDER_DEVELOPMENT);

    ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
    for (int i = 0; i < data.numAttributes(); i++) {
        Feature f = new Feature(data.attribute(i).name());
        if (data.classIndex() != i) {
            independentFeatures.add(f);
        }
    }

    Feature dependentFeature = new Feature(data.classAttribute().name());
    Feature predictedFeature = dependentFeature;
    model.setDependentFeature(dependentFeature);
    model.setIndependentFeatures(independentFeatures);
    model.setPredictionFeature(predictedFeature);
    tempFile.delete();
    return model;
}

From source file:org.opentox.qsar.processors.trainers.classification.SVCTrainer.java

License:Open Source License

public QSARModel train(Instances data) throws QSARException {

    // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
    // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
    final String rand = java.util.UUID.randomUUID().toString();
    final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
    final File tempFile = new File(temporaryFilePath);

    // SAVE THE DATA IN THE TEMPORARY FILE
    try {/*from  www .j a va  2  s . co m*/
        ArffSaver dataSaver = new ArffSaver();
        dataSaver.setInstances(data);
        dataSaver.setDestination(new FileOutputStream(tempFile));
        dataSaver.writeBatch();
        if (!tempFile.exists()) {
            throw new IOException("Temporary File was not created");
        }
    } catch (final IOException ex) {/*
                                    * The content of the dataset cannot be
                                    * written to the destination file due to
                                    * some communication issue.
                                    */
        tempFile.delete();
        throw new RuntimeException(
                "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
    }

    // INITIALIZE THE CLASSIFIER
    SMO classifier = new SMO();
    classifier.setEpsilon(0.1);
    classifier.setToleranceParameter(tolerance);

    // CONSTRUCT A KERNEL ACCORDING TO THE POSTED PARAMETERS
    // SUPPORTED KERNELS ARE {rbf, linear, polynomial}
    Kernel svc_kernel = null;
    if (this.kernel.equalsIgnoreCase("rbf")) {
        RBFKernel rbf_kernel = new RBFKernel();
        rbf_kernel.setGamma(gamma);
        rbf_kernel.setCacheSize(cacheSize);
        svc_kernel = rbf_kernel;
    } else if (this.kernel.equalsIgnoreCase("polynomial")) {
        PolyKernel poly_kernel = new PolyKernel();
        poly_kernel.setExponent(degree);
        poly_kernel.setCacheSize(cacheSize);
        poly_kernel.setUseLowerOrder(true);
        svc_kernel = poly_kernel;
    } else if (this.kernel.equalsIgnoreCase("linear")) {
        PolyKernel linear_kernel = new PolyKernel();
        linear_kernel.setExponent((double) 1.0);
        linear_kernel.setCacheSize(cacheSize);
        linear_kernel.setUseLowerOrder(true);
        svc_kernel = linear_kernel;
    }
    classifier.setKernel(svc_kernel);

    String modelFilePath = ServerFolders.models_weka + "/" + uuid.toString();
    String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
            /// Save the model in the following directory
            "-d", modelFilePath };

    // AFTER ALL, BUILD THE CLASSIFICATION MODEL AND SAVE IT AS A SERIALIZED
    // WEKA FILE IN THE CORRESPONDING DIRECTORY.
    try {
        Evaluation.evaluateModel(classifier, generalOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                + "a support vector classification model. Possible explanation : {" + ex.getMessage() + "}",
                ex);
    }

    ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
    for (int i = 0; i < data.numAttributes(); i++) {
        Feature f = new Feature(data.attribute(i).name());
        if (data.classIndex() != i) {
            independentFeatures.add(f);
        }
    }

    Feature dependentFeature = new Feature(data.classAttribute().name());
    Feature predictedFeature = dependentFeature;

    QSARModel model = new QSARModel();
    model.setCode(uuid.toString());
    model.setAlgorithm(YaqpAlgorithms.SVC);
    model.setPredictionFeature(predictedFeature);
    model.setDependentFeature(dependentFeature);
    model.setIndependentFeatures(independentFeatures);
    model.setDataset(datasetUri);
    model.setParams(getParameters());
    model.setModelStatus(ModelStatus.UNDER_DEVELOPMENT);

    tempFile.delete();
    return model;
}

From source file:org.opentox.qsar.processors.trainers.regression.MLRTrainer.java

License:Open Source License

/**
 * Trains the MLR model given an Instances object with the training data. The prediction
 * feature (class attributre) is specified in the constructor of the class.
 * @param data The training data as <code>weka.core.Instances</code> object.
 * @return The QSARModel corresponding to the trained model.
 * @throws QSARException In case the model cannot be trained
 * <p>//from ww  w  .j  a  v  a  2  s.  co m
 * <table>
 * <thead>
 * <tr>
 * <td><b>Code</b></td><td><b>Explanation</b></td>
 * </tr>
 * </thead>
 * <tbody>
 * <tr>
 * <td>XQReg1</td><td>Could not train the an model</td>
 * </tr>
 * <tr>
 * <td>XQReg2</td><td>Could not generate PMML representation for the model</td>
 * </tr>
 * <tr>
 * <td>XQReg202</td><td>The prediction feature you provided is not a valid numeric attribute of the dataset</td>
 * </tr>
 * </tbody>
 * </table>
 * </p>
 * @throws NullPointerException
 *      In case the provided training data is null.
 */
public QSARModel train(Instances data) throws QSARException {

    // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
    // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
    final String rand = java.util.UUID.randomUUID().toString();
    final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
    final File tempFile = new File(temporaryFilePath);

    // SAVE THE DATA IN THE TEMPORARY FILE
    try {
        ArffSaver dataSaver = new ArffSaver();
        dataSaver.setInstances(data);
        dataSaver.setDestination(new FileOutputStream(tempFile));
        dataSaver.writeBatch();
    } catch (final IOException ex) {
        tempFile.delete();
        throw new RuntimeException(
                "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
    }

    LinearRegression linreg = new LinearRegression();
    String[] linRegOptions = { "-S", "1", "-C" };
    try {
        linreg.setOptions(linRegOptions);
        linreg.buildClassifier(data);
    } catch (final Exception ex) {// illegal options or could not build the classifier!
        String message = "MLR Model could not be trained";
        YaqpLogger.LOG.log(new Trace(getClass(), message + " :: " + ex));
        throw new QSARException(Cause.XQReg1, message, ex);
    }

    try {
        generatePMML(linreg, data);
    } catch (final YaqpIOException ex) {
        String message = "Could not generate PMML representation for MLR model :: " + ex;
        throw new QSARException(Cause.XQReg2, message, ex);
    }

    // PERFORM THE TRAINING
    String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
            /// Save the model in the following directory
            "-d", ServerFolders.models_weka + "/" + uuid };
    try {
        Evaluation.evaluateModel(linreg, generalOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                + "an SVM model. Possible explanation : {" + ex.getMessage() + "}", ex);
    }

    ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
    for (int i = 0; i < data.numAttributes(); i++) {
        Feature f = new Feature(data.attribute(i).name());
        if (data.classIndex() != i) {
            independentFeatures.add(f);
        }
    }

    Feature dependentFeature = new Feature(data.classAttribute().name());
    Feature predictedFeature = dependentFeature;

    QSARModel model = new QSARModel(uuid.toString(), predictedFeature, dependentFeature, independentFeatures,
            YaqpAlgorithms.MLR, new User(), null, datasetUri, ModelStatus.UNDER_DEVELOPMENT);
    model.setParams(new HashMap<String, AlgorithmParameter>());

    return model;

}

From source file:org.opentox.qsar.processors.trainers.regression.MLRTrainer.java

License:Open Source License

/**
 * Generates the PMML representation of the model and stores in the hard
 * disk./*  w  w  w  .  ja  va2  s  . c om*/
 * @param coefficients The vector of the coefficients of the MLR model.
 * @param model_id The id of the generated model.
 * TODO: build the XML using some XML editor
 */
// <editor-fold defaultstate="collapsed" desc="PMML generation routine!">
private void generatePMML(final LinearRegression wekaModel, final Instances data) throws YaqpIOException {
    final double[] coefficients = wekaModel.coefficients();
    StringBuilder pmml = new StringBuilder();
    pmml.append("<?xml version=\"1.0\" ?>");
    pmml.append(PMMLIntro);
    pmml.append("<Model ID=\"" + uuid.toString() + "\" Name=\"MLR Model\">\n");
    pmml.append("<AlgorithmID href=\"" + Configuration.BASE_URI + "/algorithm/mlr\"/>\n");
    pmml.append("<DatasetID href=\"" + datasetUri + "\"/>\n");
    pmml.append("<AlgorithmParameters />\n");
    pmml.append("<FeatureDefinitions>\n");
    for (int k = 1; k <= data.numAttributes(); k++) {
        pmml.append("<link href=\"" + data.attribute(k - 1).name() + "\"/>\n");
    }
    pmml.append("<target index=\"" + data.attribute(predictionFeature).index() + "\" name=\""
            + predictionFeature + "\"/>\n");
    pmml.append("</FeatureDefinitions>\n");
    pmml.append("<Timestamp>" + java.util.GregorianCalendar.getInstance().getTime() + "</Timestamp>\n");
    pmml.append("</Model>\n");

    pmml.append("<DataDictionary numberOfFields=\"" + data.numAttributes() + "\" >\n");
    for (int k = 0; k <= data.numAttributes() - 1; k++) {
        pmml.append("<DataField name=\"" + data.attribute(k).name()
                + "\" optype=\"continuous\" dataType=\"double\" />\n");
    }
    pmml.append("</DataDictionary>\n");
    // RegressionModel
    pmml.append("<RegressionModel modelName=\"" + uuid.toString() + "\"" + " functionName=\"regression\""
            + " modelType=\"linearRegression\"" + " algorithmName=\"linearRegression\"" + " targetFieldName=\""
            + data.classAttribute().name() + "\"" + ">\n");
    // RegressionModel::MiningSchema
    pmml.append("<MiningSchema>\n");
    for (int k = 0; k <= data.numAttributes() - 1; k++) {
        if (k != data.classIndex()) {
            pmml.append("<MiningField name=\"" + data.attribute(k).name() + "\" />\n");
        }
    }
    pmml.append("<MiningField name=\"" + data.attribute(data.classIndex()).name() + "\" "
            + "usageType=\"predicted\"/>\n");
    pmml.append("</MiningSchema>\n");
    // RegressionModel::RegressionTable
    pmml.append("<RegressionTable intercept=\"" + coefficients[coefficients.length - 1] + "\">\n");

    for (int k = 0; k <= data.numAttributes() - 1; k++) {

        if (!(predictionFeature.equals(data.attribute(k).name()))) {
            pmml.append("<NumericPredictor name=\"" + data.attribute(k).name() + "\" " + " exponent=\"1\" "
                    + "coefficient=\"" + coefficients[k] + "\"/>\n");
        }
    }
    pmml.append("</RegressionTable>\n");
    pmml.append("</RegressionModel>\n");
    pmml.append("</PMML>\n\n");
    try {
        FileWriter fwriter = new FileWriter(ServerFolders.models_pmml + "/" + uuid.toString());
        BufferedWriter writer = new BufferedWriter(fwriter);
        writer.write(pmml.toString());
        writer.flush();
        writer.close();
    } catch (IOException ex) {
        throw new YaqpIOException(Cause.XQReg3, "Could not write data to PMML file :" + uuid.toString(), ex);
    }
}

From source file:org.opentox.qsar.processors.trainers.regression.SVMTrainer.java

License:Open Source License

/**
 *
 * @param data/*from  ww w .jav a2s . co m*/
 * @return
 * @throws QSARException
 */
public QSARModel train(Instances data) throws QSARException {

    // NOTE: The checks (check if data is null and if the prediction feature is
    //       acceptable are found in WekaRegressor. The method preprocessData(Instances)
    //       does this job.        

    // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
    // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
    final String rand = java.util.UUID.randomUUID().toString();
    final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
    final File tempFile = new File(temporaryFilePath);

    // SAVE THE DATA IN THE TEMPORARY FILE
    try {
        ArffSaver dataSaver = new ArffSaver();
        dataSaver.setInstances(data);
        dataSaver.setDestination(new FileOutputStream(tempFile));
        dataSaver.writeBatch();
    } catch (final IOException ex) {
        tempFile.delete();
        throw new RuntimeException(
                "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
    }

    // INITIALIZE THE REGRESSOR
    SVMreg regressor = new SVMreg();
    final String[] regressorOptions = { "-P", Double.toString(epsilon), "-T", Double.toString(tolerance) };

    Kernel svm_kernel = null;
    if (kernel.equalsIgnoreCase("rbf")) {
        RBFKernel rbf_kernel = new RBFKernel();
        rbf_kernel.setGamma(Double.parseDouble(Double.toString(gamma)));
        rbf_kernel.setCacheSize(Integer.parseInt(Integer.toString(cacheSize)));
        svm_kernel = rbf_kernel;
    } else if (kernel.equalsIgnoreCase("polynomial")) {
        PolyKernel poly_kernel = new PolyKernel();
        poly_kernel.setExponent(Double.parseDouble(Integer.toString(degree)));
        poly_kernel.setCacheSize(Integer.parseInt(Integer.toString(cacheSize)));
        poly_kernel.setUseLowerOrder(true);
        svm_kernel = poly_kernel;
    } else if (kernel.equalsIgnoreCase("linear")) {
        PolyKernel poly_kernel = new PolyKernel();
        poly_kernel.setExponent((double) 1.0);
        poly_kernel.setCacheSize(Integer.parseInt(Integer.toString(cacheSize)));
        poly_kernel.setUseLowerOrder(true);
        svm_kernel = poly_kernel;
    }
    regressor.setKernel(svm_kernel);
    try {
        regressor.setOptions(regressorOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new IllegalArgumentException("Bad options in SVM trainer for epsilon = {" + epsilon + "} or "
                + "tolerance = {" + tolerance + "}.", ex);
    }

    // PERFORM THE TRAINING
    String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
            /// Save the model in the following directory
            "-d", ServerFolders.models_weka + "/" + uuid };
    try {
        Evaluation.evaluateModel(regressor, generalOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                + "an SVM model. Possible explanation : {" + ex.getMessage() + "}", ex);
    }

    QSARModel model = new QSARModel();

    model.setParams(getParameters());
    model.setCode(uuid.toString());
    model.setAlgorithm(YaqpAlgorithms.SVM);
    model.setDataset(datasetUri);
    model.setModelStatus(ModelStatus.UNDER_DEVELOPMENT);

    ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
    for (int i = 0; i < data.numAttributes(); i++) {
        Feature f = new Feature(data.attribute(i).name());
        if (data.classIndex() != i) {
            independentFeatures.add(f);
        }
    }

    Feature dependentFeature = new Feature(data.classAttribute().name());
    Feature predictedFeature = dependentFeature;
    model.setDependentFeature(dependentFeature);
    model.setIndependentFeatures(independentFeatures);
    model.setPredictionFeature(predictedFeature);
    tempFile.delete();
    return model;
}

From source file:org.packDataMining.SMOTE.java

License:Open Source License

/**
 * The procedure implementing the SMOTE algorithm. The output
 * instances are pushed onto the output queue for collection.
 * //from   www  .  j av  a2  s  .c om
 * @throws Exception    if provided options cannot be executed 
 *          on input instances
 */
protected void doSMOTE() throws Exception {
    int minIndex = 0;
    int min = Integer.MAX_VALUE;
    if (m_DetectMinorityClass) {
        // find minority class
        int[] classCounts = getInputFormat().attributeStats(getInputFormat().classIndex()).nominalCounts;
        for (int i = 0; i < classCounts.length; i++) {
            if (classCounts[i] != 0 && classCounts[i] < min) {
                min = classCounts[i];
                minIndex = i;
            }
        }
    } else {
        String classVal = getClassValue();
        if (classVal.equalsIgnoreCase("first")) {
            minIndex = 1;
        } else if (classVal.equalsIgnoreCase("last")) {
            minIndex = getInputFormat().numClasses();
        } else {
            minIndex = Integer.parseInt(classVal);
        }
        if (minIndex > getInputFormat().numClasses()) {
            throw new Exception("value index must be <= the number of classes");
        }
        minIndex--; // make it an index
    }

    int nearestNeighbors;
    if (min <= getNearestNeighbors()) {
        nearestNeighbors = min - 1;
    } else {
        nearestNeighbors = getNearestNeighbors();
    }
    if (nearestNeighbors < 1)
        throw new Exception("Cannot use 0 neighbors!");

    // compose minority class dataset
    // also push all dataset instances
    Instances sample = getInputFormat().stringFreeStructure();
    Enumeration instanceEnum = getInputFormat().enumerateInstances();
    while (instanceEnum.hasMoreElements()) {
        Instance instance = (Instance) instanceEnum.nextElement();
        push((Instance) instance.copy());
        if ((int) instance.classValue() == minIndex) {
            sample.add(instance);
        }
    }

    // compute Value Distance Metric matrices for nominal features
    Map vdmMap = new HashMap();
    Enumeration attrEnum = getInputFormat().enumerateAttributes();
    while (attrEnum.hasMoreElements()) {
        Attribute attr = (Attribute) attrEnum.nextElement();
        if (!attr.equals(getInputFormat().classAttribute())) {
            if (attr.isNominal() || attr.isString()) {
                double[][] vdm = new double[attr.numValues()][attr.numValues()];
                vdmMap.put(attr, vdm);
                int[] featureValueCounts = new int[attr.numValues()];
                int[][] featureValueCountsByClass = new int[getInputFormat().classAttribute().numValues()][attr
                        .numValues()];
                instanceEnum = getInputFormat().enumerateInstances();
                while (instanceEnum.hasMoreElements()) {
                    Instance instance = (Instance) instanceEnum.nextElement();
                    int value = (int) instance.value(attr);
                    int classValue = (int) instance.classValue();
                    featureValueCounts[value]++;
                    featureValueCountsByClass[classValue][value]++;
                }
                for (int valueIndex1 = 0; valueIndex1 < attr.numValues(); valueIndex1++) {
                    for (int valueIndex2 = 0; valueIndex2 < attr.numValues(); valueIndex2++) {
                        double sum = 0;
                        for (int classValueIndex = 0; classValueIndex < getInputFormat()
                                .numClasses(); classValueIndex++) {
                            double c1i = (double) featureValueCountsByClass[classValueIndex][valueIndex1];
                            double c2i = (double) featureValueCountsByClass[classValueIndex][valueIndex2];
                            double c1 = (double) featureValueCounts[valueIndex1];
                            double c2 = (double) featureValueCounts[valueIndex2];
                            double term1 = c1i / c1;
                            double term2 = c2i / c2;
                            sum += Math.abs(term1 - term2);
                        }
                        vdm[valueIndex1][valueIndex2] = sum;
                    }
                }
            }
        }
    }

    // use this random source for all required randomness
    Random rand = new Random(getRandomSeed());

    // find the set of extra indices to use if the percentage is not evenly divisible by 100
    List extraIndices = new LinkedList();
    double percentageRemainder = (getPercentage() / 100) - Math.floor(getPercentage() / 100.0);
    int extraIndicesCount = (int) (percentageRemainder * sample.numInstances());
    if (extraIndicesCount >= 1) {
        for (int i = 0; i < sample.numInstances(); i++) {
            extraIndices.add(i);
        }
    }
    Collections.shuffle(extraIndices, rand);
    extraIndices = extraIndices.subList(0, extraIndicesCount);
    Set extraIndexSet = new HashSet(extraIndices);

    // the main loop to handle computing nearest neighbors and generating SMOTE
    // examples from each instance in the original minority class data
    Instance[] nnArray = new Instance[nearestNeighbors];
    for (int i = 0; i < sample.numInstances(); i++) {
        Instance instanceI = sample.instance(i);
        // find k nearest neighbors for each instance
        List distanceToInstance = new LinkedList();
        for (int j = 0; j < sample.numInstances(); j++) {
            Instance instanceJ = sample.instance(j);
            if (i != j) {
                double distance = 0;
                attrEnum = getInputFormat().enumerateAttributes();
                while (attrEnum.hasMoreElements()) {
                    Attribute attr = (Attribute) attrEnum.nextElement();
                    if (!attr.equals(getInputFormat().classAttribute())) {
                        double iVal = instanceI.value(attr);
                        double jVal = instanceJ.value(attr);
                        if (attr.isNumeric()) {
                            distance += Math.pow(iVal - jVal, 2);
                        } else {
                            distance += ((double[][]) vdmMap.get(attr))[(int) iVal][(int) jVal];
                        }
                    }
                }
                distance = Math.pow(distance, .5);
                distanceToInstance.add(new Object[] { distance, instanceJ });
            }
        }

        // sort the neighbors according to distance
        Collections.sort(distanceToInstance, new Comparator() {
            public int compare(Object o1, Object o2) {
                double distance1 = (Double) ((Object[]) o1)[0];
                double distance2 = (Double) ((Object[]) o2)[0];
                return (int) Math.ceil(distance1 - distance2);
            }
        });

        // populate the actual nearest neighbor instance array
        Iterator entryIterator = distanceToInstance.iterator();
        int j = 0;
        while (entryIterator.hasNext() && j < nearestNeighbors) {
            nnArray[j] = (Instance) ((Object[]) entryIterator.next())[1];
            j++;
        }

        // create synthetic examples
        int n = (int) Math.floor(getPercentage() / 100);
        while (n > 0 || extraIndexSet.remove(i)) {
            double[] values = new double[sample.numAttributes()];
            int nn = rand.nextInt(nearestNeighbors);
            attrEnum = getInputFormat().enumerateAttributes();
            while (attrEnum.hasMoreElements()) {
                Attribute attr = (Attribute) attrEnum.nextElement();
                if (!attr.equals(getInputFormat().classAttribute())) {
                    if (attr.isNumeric()) {
                        double dif = nnArray[nn].value(attr) - instanceI.value(attr);
                        double gap = rand.nextDouble();
                        values[attr.index()] = (double) (instanceI.value(attr) + gap * dif);
                    } else if (attr.isDate()) {
                        double dif = nnArray[nn].value(attr) - instanceI.value(attr);
                        double gap = rand.nextDouble();
                        values[attr.index()] = (long) (instanceI.value(attr) + gap * dif);
                    } else {
                        int[] valueCounts = new int[attr.numValues()];
                        int iVal = (int) instanceI.value(attr);
                        valueCounts[iVal]++;
                        for (int nnEx = 0; nnEx < nearestNeighbors; nnEx++) {
                            int val = (int) nnArray[nnEx].value(attr);
                            valueCounts[val]++;
                        }
                        int maxIndex = 0;
                        int max = Integer.MIN_VALUE;
                        for (int index = 0; index < attr.numValues(); index++) {
                            if (valueCounts[index] > max) {
                                max = valueCounts[index];
                                maxIndex = index;
                            }
                        }
                        values[attr.index()] = maxIndex;
                    }
                }
            }
            values[sample.classIndex()] = minIndex;
            Instance synthetic = new Instance(1.0, values);
            push(synthetic);
            n--;
        }
    }
}

From source file:org.pentaho.di.scoring.WekaScoringData.java

License:Open Source License

/**
 * Finds a mapping between the attributes that a Weka model has been trained
 * with and the incoming Kettle row format. Returns an array of indices, where
 * the element at index 0 of the array is the index of the Kettle field that
 * corresponds to the first attribute in the Instances structure, the element
 * at index 1 is the index of the Kettle fields that corresponds to the second
 * attribute, .../*from  ww  w .  j a  v  a 2  s.c  o  m*/
 *
 * @param header                 the Instances header
 * @param inputRowMeta           the meta data for the incoming rows
 * @param updateIncrementalModel true if the model is incremental and should
 *                               be updated on the incoming instances
 * @param log                    the log to use
 */
public void mapIncomingRowMetaData(Instances header, RowMetaInterface inputRowMeta,
        boolean updateIncrementalModel, LogChannelInterface log) {
    m_mappingIndexes = WekaScoringData.findMappings(header, inputRowMeta);
    m_updateIncrementalModel = updateIncrementalModel;

    // If updating of incremental models has been selected, then
    // check on the ability to do this
    if (m_updateIncrementalModel && m_model.isSupervisedLearningModel()) {
        if (m_model.isUpdateableModel()) {
            // Do we have the class mapped successfully to an incoming
            // Kettle field
            if (m_mappingIndexes[header.classIndex()] == WekaScoringData.NO_MATCH
                    || m_mappingIndexes[header.classIndex()] == WekaScoringData.TYPE_MISMATCH) {
                m_updateIncrementalModel = false;
                log.logError(
                        BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringMeta.Log.NoMatchForClass")); //$NON-NLS-1$
            }
        } else {
            m_updateIncrementalModel = false;
            log.logError(BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringMeta.Log.ModelNotUpdateable")); //$NON-NLS-1$
        }
    }
}

From source file:org.pentaho.di.scoring.WekaScoringDialog.java

License:Open Source License

private void checkAbilityToProduceProbabilities(WekaScoringModel tempM) {
    // take a look at the model-type and then the class
    // attribute (if set and if necessary) in order
    // to determine whether to disable/enable the
    // output probabilities checkbox
    if (!tempM.isSupervisedLearningModel()) {
        // now, does the clusterer produce probabilities?
        if (((WekaScoringClusterer) tempM).canProduceProbabilities()) {
            m_wOutputProbs.setEnabled(true);
        } else {//from  ww w.  j av a 2  s. c  o  m
            m_wOutputProbs.setSelection(false);
            m_wOutputProbs.setEnabled(false);
        }
    } else {
        // take a look at the header and disable the output
        // probs checkbox if there is a class attribute set
        // and the class is numeric
        Instances header = tempM.getHeader();
        if (header.classIndex() >= 0) {
            if (header.classAttribute().isNumeric()) {
                m_wOutputProbs.setSelection(false);
                m_wOutputProbs.setEnabled(false);
            } else {
                m_wOutputProbs.setEnabled(true);
            }
        }
    }
}

From source file:org.scify.NewSumServer.Server.MachineLearning.labelTagging.java

License:Apache License

/**
 * Find the recommend labels from classifier
 *
 * @return the recommend labels//from  www .  ja  v  a  2 s  .  c om
 */
public static String recommendation(INSECTDB file, String text) {

    String labelList = "-none-";
    //create IVector
    String Ivector = vector.labellingVector(text, file); // take the similarity vectors for each class graph

    try {

        Instances dataTrainSet = dataSets.trainingSet(file); //take the train  dataset 
        Instances dataLabelSet = dataSets.labelingSet(file, Ivector);//take tha labe  dataset
        ArffSaver saver = new ArffSaver();
        saver.setInstances(dataTrainSet);
        saver.setFile(new File("./data/dataTrainSet.arff"));
        saver.writeBatch();

        ArffSaver saver2 = new ArffSaver();
        saver2.setInstances(dataLabelSet);
        saver2.setFile(new File("./data/dataLabelSet.arff"));
        saver2.writeBatch();

        File temp = File.createTempFile("exportFile", null);
        //TODO: creat classifier

        //            String option = "-S 2 -K 2 -D 3 -G 0.0 -R 0.0 -N 0.5 -M 40.0 -C 1.0 -E 0.001 -P 0.1"; // classifier options
        //            String[] options = option.split("\\s+");

        if (dataTrainSet.classIndex() == -1) {
            dataTrainSet.setClassIndex(dataTrainSet.numAttributes() - 1);
        }

        // Create a  classifier LibSVM

        //            NaiveBayes nb = new NaiveBayes();
        //            RandomForest nb = new RandomForest();
        J48 nb = new J48();
        //            nb.setOptions(options);
        nb.buildClassifier(dataTrainSet);

        // End train method

        if (dataLabelSet.classIndex() == -1) {
            dataLabelSet.setClassIndex(dataLabelSet.numAttributes() - 1);
        }

        StringBuffer writer = new StringBuffer();

        PlainText output = new PlainText();
        output.setBuffer(writer);
        output.setHeader(dataLabelSet);
        output.printClassifications(nb, dataLabelSet);

        //            PrintStream ps2 = new PrintStream(classGname);
        //            ps2.print(writer.toString());
        //            ps2.close();
        PrintStream ps = new PrintStream(temp); //Add to temp file the results of classifying
        ps.print(writer.toString());
        ps.close();

        //TODO: export result
        //            labelList = result(temp);                                                    //if result is true adds the current class graph name in label list
        labelList = result(temp) + " --------->> " + text; //if result is true adds the current class graph name in label list
        Utilities.appendToFile(labelList);

    } catch (Exception ex) {
        Logger.getLogger(labelTagging.class.getName()).log(Level.SEVERE, null, ex);
    }

    return labelList;
}