Example usage for weka.core Range Range

List of usage examples for weka.core Range Range

Introduction

In this page you can find the example usage for weka.core Range Range.

Prototype

public Range(String rangeList) 

Source Link

Document

Constructor to set initial range.

Usage

From source file:ExperimentDemo.java

License:Open Source License

/**
 * Expects the following parameters: /*w  w w  .j  a va  2 s . c  o m*/
 * <ul>
 *   <li>-classifier "classifier incl. parameters"</li>
 *   <li>-exptype "classification|regression"</li>
 *   <li>-splittype "crossvalidation|randomsplit"</li>
 *   <li>-runs "# of runs"</li>
 *   <li>-folds "# of cross-validation folds"</li>
 *   <li>-percentage "percentage for randomsplit"</li>
 *   <li>-result "arff file for storing the results"</li>
 *   <li>-t "dataset" (can be supplied multiple times)</li>
 * </ul>
 * 
 * @param args   the commandline arguments
 * @throws Exception   if something goes wrong
 */
public static void main(String[] args) throws Exception {
    // parameters provided?
    if (args.length == 0) {
        System.out.println("\nUsage: weka.examples.experiment.ExperimentDemo\n"
                + "\t   -classifier <classifier incl. parameters>\n"
                + "\t   -exptype <classification|regression>\n"
                + "\t   -splittype <crossvalidation|randomsplit>\n" + "\t   -runs <# of runs>\n"
                + "\t   -folds <folds for CV>\n" + "\t   -percentage <percentage for randomsplit>\n"
                + "\t   -result <ARFF file for storing the results>\n"
                + "\t   -t dataset (can be supplied multiple times)\n");
        System.exit(1);
    }

    // 1. setup the experiment
    System.out.println("Setting up...");
    Experiment exp = new Experiment();
    exp.setPropertyArray(new Classifier[0]);
    exp.setUsePropertyIterator(true);

    String option;

    // classification or regression
    option = Utils.getOption("exptype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No experiment type provided!");

    SplitEvaluator se = null;
    /*
     * Interface to objects able to generate a fixed set of results for a particular split of a dataset.
     * The set of results should contain fields related to any settings of the SplitEvaluator (not including the dataset name.
     * For example, one field for the classifier used to get the results, another for the classifier options, etc).
     * Possible implementations of SplitEvaluator: StdClassification results, StdRegression results.
     */
    Classifier sec = null;
    boolean classification = false;
    if (option.equals("classification")) {
        classification = true;
        se = new ClassifierSplitEvaluator();
        /*
         * A SplitEvaluator that produces results for a classification scheme on a nominal class attribute. 
         */
        sec = ((ClassifierSplitEvaluator) se).getClassifier();
    } else if (option.equals("regression")) {
        se = new RegressionSplitEvaluator();
        sec = ((RegressionSplitEvaluator) se).getClassifier();
    } else {
        throw new IllegalArgumentException("Unknown experiment type '" + option + "'!");
    }

    // crossvalidation or randomsplit
    option = Utils.getOption("splittype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No split type provided!");

    if (option.equals("crossvalidation")) {
        CrossValidationResultProducer cvrp = new CrossValidationResultProducer();
        /*
         * Generates for each run, carries out an n-fold cross-validation, using the set SplitEvaluator to generate some results.
         * If the class attribute is nominal, the dataset is stratified. Results for each fold are generated, so you may wish to use
         * this in addition with an AveragingResultProducer to obtain averages for each run. 
         */
        option = Utils.getOption("folds", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No folds provided!");
        cvrp.setNumFolds(Integer.parseInt(option));
        cvrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        /*
         * Stores information on a property of an object: the class of the object with the property;
         * the property descriptor, and the current value.
         */
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", CrossValidationResultProducer.class),
                    CrossValidationResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(cvrp);
        exp.setPropertyPath(propertyPath);

    } else if (option.equals("randomsplit")) {
        RandomSplitResultProducer rsrp = new RandomSplitResultProducer();
        rsrp.setRandomizeData(true);
        option = Utils.getOption("percentage", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No percentage provided!");
        rsrp.setTrainPercent(Double.parseDouble(option));
        rsrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", RandomSplitResultProducer.class),
                    RandomSplitResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(rsrp);
        exp.setPropertyPath(propertyPath);
    } else {
        throw new IllegalArgumentException("Unknown split type '" + option + "'!");
    }

    // runs
    option = Utils.getOption("runs", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No runs provided!");
    exp.setRunLower(1);
    exp.setRunUpper(Integer.parseInt(option));

    // classifier
    option = Utils.getOption("classifier", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No classifier provided!");
    String[] options = Utils.splitOptions(option);
    String classname = options[0];
    options[0] = "";
    Classifier c = (Classifier) Utils.forName(Classifier.class, classname, options);
    exp.setPropertyArray(new Classifier[] { c });

    // datasets
    boolean data = false;
    DefaultListModel model = new DefaultListModel();
    do {
        option = Utils.getOption("t", args);
        if (option.length() > 0) {
            File file = new File(option);
            if (!file.exists())
                throw new IllegalArgumentException("File '" + option + "' does not exist!");
            data = true;
            model.addElement(file);
        }
    } while (option.length() > 0);
    if (!data)
        throw new IllegalArgumentException("No data files provided!");
    exp.setDatasets(model);

    // result
    option = Utils.getOption("result", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No result file provided!");
    InstancesResultListener irl = new InstancesResultListener();
    irl.setOutputFile(new File(option));
    exp.setResultListener(irl);

    // 2. run experiment
    System.out.println("Initializing...");
    exp.initialize();
    System.out.println("Running...");
    exp.runExperiment();
    System.out.println("Finishing...");
    exp.postProcess();

    // 3. calculate statistics and output them
    System.out.println("Evaluating...");
    PairedTTester tester = new PairedCorrectedTTester();
    /*
     * Calculates T-Test statistics on data stored in a set of instances. 
     */
    Instances result = new Instances(new BufferedReader(new FileReader(irl.getOutputFile())));
    tester.setInstances(result);
    tester.setSortColumn(-1);
    tester.setRunColumn(result.attribute("Key_Run").index());
    if (classification)
        tester.setFoldColumn(result.attribute("Key_Fold").index());
    tester.setDatasetKeyColumns(new Range("" + (result.attribute("Key_Dataset").index() + 1)));
    tester.setResultsetKeyColumns(new Range("" + (result.attribute("Key_Scheme").index() + 1) + ","
            + (result.attribute("Key_Scheme_options").index() + 1) + ","
            + (result.attribute("Key_Scheme_version_ID").index() + 1)));
    tester.setResultMatrix(new ResultMatrixPlainText());
    tester.setDisplayedResultsets(null);
    tester.setSignificanceLevel(0.05);
    tester.setShowStdDevs(true);
    // fill result matrix (but discarding the output)
    if (classification)
        tester.multiResultsetFull(0, result.attribute("Percent_correct").index());
    else
        tester.multiResultsetFull(0, result.attribute("Correlation_coefficient").index());
    // output results for reach dataset
    System.out.println("\nResult:");
    ResultMatrix matrix = tester.getResultMatrix();
    for (int i = 0; i < matrix.getColCount(); i++) {
        System.out.println(matrix.getColName(i));
        System.out.println("    Perc. correct: " + matrix.getMean(i, 0));
        System.out.println("    StdDev: " + matrix.getStdDev(i, 0));
    }
}

From source file:MultiClassClassifier.java

License:Open Source License

/**
 * Returns the distribution for an instance.
 *
 * @param inst the instance to get the distribution for
 * @return the distribution//from w  w w  . ja v  a2  s  .  c o  m
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance inst) throws Exception {

    if (m_Classifiers.length == 1) {
        return m_Classifiers[0].distributionForInstance(inst);
    }

    double[] probs = new double[inst.numClasses()];

    if (m_Method == METHOD_1_AGAINST_1) {
        double[][] r = new double[inst.numClasses()][inst.numClasses()];
        double[][] n = new double[inst.numClasses()][inst.numClasses()];

        for (int i = 0; i < m_ClassFilters.length; i++) {
            if (m_Classifiers[i] != null) {
                Instance tempInst = (Instance) inst.copy();
                tempInst.setDataset(m_TwoClassDataset);
                double[] current = m_Classifiers[i].distributionForInstance(tempInst);
                Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
                range.setUpper(m_ClassAttribute.numValues());
                int[] pair = range.getSelection();
                if (m_pairwiseCoupling && inst.numClasses() > 2) {
                    r[pair[0]][pair[1]] = current[0];
                    n[pair[0]][pair[1]] = m_SumOfWeights[i];
                } else {
                    if (current[0] > current[1]) {
                        probs[pair[0]] += 1.0;
                    } else {
                        probs[pair[1]] += 1.0;
                    }
                }
            }
        }
        if (m_pairwiseCoupling && inst.numClasses() > 2) {
            return pairwiseCoupling(n, r);
        }
    } else {
        // error correcting style methods
        for (int i = 0; i < m_ClassFilters.length; i++) {
            m_ClassFilters[i].input(inst);
            m_ClassFilters[i].batchFinished();
            double[] current = m_Classifiers[i].distributionForInstance(m_ClassFilters[i].output());
            //Calibrate the binary classifier scores

            for (int j = 0; j < m_ClassAttribute.numValues(); j++) {
                if (((MakeIndicator) m_ClassFilters[i]).getValueRange().isInRange(j)) {
                    probs[j] += current[1];
                } else {
                    probs[j] += current[0];
                }
            }
        }
    }

    if (Utils.gr(Utils.sum(probs), 0)) {
        Utils.normalize(probs);
        return probs;
    } else {
        return m_ZeroR.distributionForInstance(inst);
    }
}

From source file:MultiClassClassifier.java

License:Open Source License

/**
   * Prints the classifiers.//from w ww  .  j a  v  a 2s .c  o  m
   * 
   * @return a string representation of the classifier
   */
public String toString() {

    if (m_Classifiers == null) {
        return "MultiClassClassifier: No model built yet.";
    }
    StringBuffer text = new StringBuffer();
    text.append("MultiClassClassifier\n\n");
    for (int i = 0; i < m_Classifiers.length; i++) {
        text.append("Classifier ").append(i + 1);
        if (m_Classifiers[i] != null) {
            if ((m_ClassFilters != null) && (m_ClassFilters[i] != null)) {
                if (m_ClassFilters[i] instanceof RemoveWithValues) {
                    Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
                    range.setUpper(m_ClassAttribute.numValues());
                    int[] pair = range.getSelection();
                    text.append(", " + (pair[0] + 1) + " vs " + (pair[1] + 1));
                } else if (m_ClassFilters[i] instanceof MakeIndicator) {
                    text.append(", using indicator values: ");
                    text.append(((MakeIndicator) m_ClassFilters[i]).getValueRange());
                }
            }
            text.append('\n');
            text.append(m_Classifiers[i].toString() + "\n\n");
        } else {
            text.append(" Skipped (no training examples)\n");
        }
    }

    return text.toString();
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * Set the value of m_SelectedRange./*  w  w  w .  j a v  a2  s.  co  m*/
 * 
 * @param newSelectedRange
 *            Value to assign to m_SelectedRange.
 */
public void setSelectedRange(String newSelectedRange) {
    m_SelectedRange = new Range(newSelectedRange);
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * determines the selected range.//from   w w  w . j a  v  a2 s. c  o  m
 */
private void determineSelectedRange() {

    Instances inputFormat = getInputFormat();

    // Calculate the default set of fields to convert
    if (m_SelectedRange == null) {
        StringBuffer fields = new StringBuffer();
        for (int j = 0; j < inputFormat.numAttributes(); j++) {
            if (inputFormat.attribute(j).type() == Attribute.STRING)
                fields.append((j + 1) + ",");
        }
        m_SelectedRange = new Range(fields.toString());
    }
    m_SelectedRange.setUpper(inputFormat.numAttributes() - 1);

    // Prevent the user from converting non-string fields
    StringBuffer fields = new StringBuffer();
    for (int j = 0; j < inputFormat.numAttributes(); j++) {
        if (m_SelectedRange.isInRange(j) && inputFormat.attribute(j).type() == Attribute.STRING)
            fields.append((j + 1) + ",");
    }
    m_SelectedRange.setRanges(fields.toString());
    m_SelectedRange.setUpper(inputFormat.numAttributes() - 1);

    // System.err.println("Selected Range: " +
    // getSelectedRange().getRanges());
}

From source file:com.emar.recsys.user.model.WekaExperiment.java

License:Open Source License

/**
 * Expects the following parameters:/*from w w w .j  a  va  2s . c  om*/
 * <ul>
 * <li>-classifier "classifier incl. parameters"</li>
 * <li>-exptype "classification|regression"</li>
 * <li>-splittype "crossvalidation|randomsplit"</li>
 * <li>-runs "# of runs"</li>
 * <li>-folds "# of cross-validation folds"</li>
 * <li>-percentage "percentage for randomsplit"</li>
 * <li>-result "arff file for storing the results"</li>
 * <li>-t "dataset" (can be supplied multiple times)</li>
 * </ul>
 * 
 * @param args
 *            the commandline arguments
 * @throws Exception
 *             if something goes wrong
 */
public static void main(String[] args) throws Exception {
    // parameters provided?
    if (args.length == 0) {
        System.out.println("\nUsage: ExperimentDemo\n" + "\t   -classifier <classifier incl. parameters>\n"
                + "\t   -exptype <classification|regression>\n"
                + "\t   -splittype <crossvalidation|randomsplit>\n" + "\t   -runs <# of runs>\n"
                + "\t   -folds <folds for CV>\n" + "\t   -percentage <percentage for randomsplit>\n"
                + "\t   -result <ARFF file for storing the results>\n"
                + "\t   -t dataset (can be supplied multiple times)\n");
        System.exit(1);
    }

    // 1. setup the experiment
    System.out.println("Setting up...");
    Experiment exp = new Experiment();
    exp.setPropertyArray(new Classifier[0]);
    exp.setUsePropertyIterator(true);

    String option;

    // classification or regression
    option = Utils.getOption("exptype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No experiment type provided!");

    SplitEvaluator se = null;
    Classifier sec = null;
    boolean classification = false;
    if (option.equals("classification")) {
        classification = true;
        se = new ClassifierSplitEvaluator();
        sec = ((ClassifierSplitEvaluator) se).getClassifier();
    } else if (option.equals("regression")) {
        se = new RegressionSplitEvaluator();
        sec = ((RegressionSplitEvaluator) se).getClassifier();
    } else {
        throw new IllegalArgumentException("Unknown experiment type '" + option + "'!");
    }

    // crossvalidation or randomsplit
    option = Utils.getOption("splittype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No split type provided!");

    if (option.equals("crossvalidation")) {
        CrossValidationResultProducer cvrp = new CrossValidationResultProducer();
        option = Utils.getOption("folds", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No folds provided!");
        cvrp.setNumFolds(Integer.parseInt(option));
        cvrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", CrossValidationResultProducer.class),
                    CrossValidationResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(cvrp);
        exp.setPropertyPath(propertyPath);

    } else if (option.equals("randomsplit")) {
        RandomSplitResultProducer rsrp = new RandomSplitResultProducer();
        rsrp.setRandomizeData(true);
        option = Utils.getOption("percentage", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No percentage provided!");
        rsrp.setTrainPercent(Double.parseDouble(option));
        rsrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", RandomSplitResultProducer.class),
                    RandomSplitResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(rsrp);
        exp.setPropertyPath(propertyPath);
    } else {
        throw new IllegalArgumentException("Unknown split type '" + option + "'!");
    }

    // runs
    option = Utils.getOption("runs", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No runs provided!");
    exp.setRunLower(1);
    exp.setRunUpper(Integer.parseInt(option));

    // classifier
    option = Utils.getOption("classifier", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No classifier provided!");
    String[] options = Utils.splitOptions(option);
    String classname = options[0];
    options[0] = "";
    Classifier c = (Classifier) Utils.forName(Classifier.class, classname, options);
    exp.setPropertyArray(new Classifier[] { c });

    // datasets
    boolean data = false;
    DefaultListModel model = new DefaultListModel();
    do {
        option = Utils.getOption("t", args);
        if (option.length() > 0) {
            File file = new File(option);
            if (!file.exists())
                throw new IllegalArgumentException("File '" + option + "' does not exist!");
            data = true;
            model.addElement(file);
        }
    } while (option.length() > 0);
    if (!data)
        throw new IllegalArgumentException("No data files provided!");
    exp.setDatasets(model);

    // result
    option = Utils.getOption("result", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No result file provided!");
    InstancesResultListener irl = new InstancesResultListener();
    irl.setOutputFile(new File(option));
    exp.setResultListener(irl);

    // 2. run experiment
    System.out.println("Initializing...");
    exp.initialize();
    System.out.println("Running...");
    exp.runExperiment();
    System.out.println("Finishing...");
    exp.postProcess();

    // 3. calculate statistics and output them
    System.out.println("Evaluating...");
    PairedTTester tester = new PairedCorrectedTTester();
    Instances result = new Instances(new BufferedReader(new FileReader(irl.getOutputFile())));
    tester.setInstances(result);
    tester.setSortColumn(-1);
    tester.setRunColumn(result.attribute("Key_Run").index());
    if (classification)
        tester.setFoldColumn(result.attribute("Key_Fold").index());
    tester.setResultsetKeyColumns(new Range("" + (result.attribute("Key_Dataset").index() + 1)));
    tester.setDatasetKeyColumns(new Range("" + (result.attribute("Key_Scheme").index() + 1) + ","
            + (result.attribute("Key_Scheme_options").index() + 1) + ","
            + (result.attribute("Key_Scheme_version_ID").index() + 1)));
    tester.setResultMatrix(new ResultMatrixPlainText());
    tester.setDisplayedResultsets(null);
    tester.setSignificanceLevel(0.05);
    tester.setShowStdDevs(true);
    // fill result matrix (but discarding the output)
    if (classification)
        tester.multiResultsetFull(0, result.attribute("Percent_correct").index());
    else
        tester.multiResultsetFull(0, result.attribute("Correlation_coefficient").index());
    // output results for reach dataset
    System.out.println("\nResult:");
    ResultMatrix matrix = tester.getResultMatrix();
    for (int i = 0; i < matrix.getColCount(); i++) {
        System.out.println(matrix.getColName(i));
        System.out.println("    Perc. correct: " + matrix.getMean(i, 0));
        System.out.println("    StdDev: " + matrix.getStdDev(i, 0));
    }
}

From source file:core.ClusterEvaluationEX.java

License:Open Source License

/**
 * Evaluates a clusterer with the options given in an array of
 * strings. It takes the string indicated by "-t" as training file, the
 * string indicated by "-T" as test file.
 * If the test file is missing, a stratified ten-fold
 * cross-validation is performed (distribution clusterers only).
 * Using "-x" you can change the number of
 * folds to be used, and using "-s" the random seed.
 * If the "-p" option is present it outputs the classification for
 * each test instance. If you provide the name of an object file using
 * "-l", a clusterer will be loaded from the given file. If you provide the
 * name of an object file using "-d", the clusterer built from the
 * training data will be saved to the given file.
 *
 * @param clusterer machine learning clusterer
 * @param options the array of string containing the options
 * @throws Exception if model could not be evaluated successfully
 * @return a string describing the results 
 *//*  w w  w . j av a2  s  .  c  o m*/
public static String evaluateClusterer(Clusterer clusterer, String[] options) throws Exception {

    int seed = 1, folds = 10;
    boolean doXval = false;
    Instances train = null;
    Random random;
    String trainFileName, testFileName, seedString, foldsString;
    String objectInputFileName, objectOutputFileName, attributeRangeString;
    String graphFileName;
    String[] savedOptions = null;
    boolean printClusterAssignments = false;
    Range attributesToOutput = null;
    StringBuffer text = new StringBuffer();
    int theClass = -1; // class based evaluation of clustering
    boolean updateable = (clusterer instanceof UpdateableClusterer);
    DataSource source = null;
    Instance inst;

    if (Utils.getFlag('h', options) || Utils.getFlag("help", options)) {

        // global info requested as well?
        boolean globalInfo = Utils.getFlag("synopsis", options) || Utils.getFlag("info", options);

        throw new Exception("Help requested." + makeOptionString(clusterer, globalInfo));
    }

    try {
        // Get basic options (options the same for all clusterers
        //printClusterAssignments = Utils.getFlag('p', options);
        objectInputFileName = Utils.getOption('l', options);
        objectOutputFileName = Utils.getOption('d', options);
        trainFileName = Utils.getOption('t', options);
        testFileName = Utils.getOption('T', options);
        graphFileName = Utils.getOption('g', options);

        // Check -p option
        try {
            attributeRangeString = Utils.getOption('p', options);
        } catch (Exception e) {
            throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. "
                    + "It now expects a parameter specifying a range of attributes "
                    + "to list with the predictions. Use '-p 0' for none.");
        }
        if (attributeRangeString.length() != 0) {
            printClusterAssignments = true;
            if (!attributeRangeString.equals("0"))
                attributesToOutput = new Range(attributeRangeString);
        }

        if (trainFileName.length() == 0) {
            if (objectInputFileName.length() == 0) {
                throw new Exception("No training file and no object " + "input file given.");
            }

            if (testFileName.length() == 0) {
                throw new Exception("No training file and no test file given.");
            }
        } else {
            if ((objectInputFileName.length() != 0) && (printClusterAssignments == false)) {
                throw new Exception("Can't use both train and model file " + "unless -p specified.");
            }
        }

        seedString = Utils.getOption('s', options);

        if (seedString.length() != 0) {
            seed = Integer.parseInt(seedString);
        }

        foldsString = Utils.getOption('x', options);

        if (foldsString.length() != 0) {
            folds = Integer.parseInt(foldsString);
            doXval = true;
        }
    } catch (Exception e) {
        throw new Exception('\n' + e.getMessage() + makeOptionString(clusterer, false));
    }

    try {
        if (trainFileName.length() != 0) {
            source = new DataSource(trainFileName);
            train = source.getStructure();

            String classString = Utils.getOption('c', options);
            if (classString.length() != 0) {
                if (classString.compareTo("last") == 0)
                    theClass = train.numAttributes();
                else if (classString.compareTo("first") == 0)
                    theClass = 1;
                else
                    theClass = Integer.parseInt(classString);

                if (theClass != -1) {
                    if (doXval || testFileName.length() != 0)
                        throw new Exception("Can only do class based evaluation on the " + "training data");

                    if (objectInputFileName.length() != 0)
                        throw new Exception("Can't load a clusterer and do class based " + "evaluation");

                    if (objectOutputFileName.length() != 0)
                        throw new Exception("Can't do class based evaluation and save clusterer");
                }
            } else {
                // if the dataset defines a class attribute, use it
                if (train.classIndex() != -1) {
                    theClass = train.classIndex() + 1;
                    System.err
                            .println("Note: using class attribute from dataset, i.e., attribute #" + theClass);
                }
            }

            if (theClass != -1) {
                if (theClass < 1 || theClass > train.numAttributes())
                    throw new Exception("Class is out of range!");

                if (!train.attribute(theClass - 1).isNominal())
                    throw new Exception("Class must be nominal!");

                train.setClassIndex(theClass - 1);
            }
        }
    } catch (Exception e) {
        throw new Exception("ClusterEvaluation: " + e.getMessage() + '.');
    }

    // Save options
    if (options != null) {
        savedOptions = new String[options.length];
        System.arraycopy(options, 0, savedOptions, 0, options.length);
    }

    if (objectInputFileName.length() != 0)
        Utils.checkForRemainingOptions(options);

    // Set options for clusterer
    if (clusterer instanceof OptionHandler)
        ((OptionHandler) clusterer).setOptions(options);

    Utils.checkForRemainingOptions(options);

    Instances trainHeader = train;
    if (objectInputFileName.length() != 0) {
        // Load the clusterer from file
        //      clusterer = (Clusterer) SerializationHelper.read(objectInputFileName);
        java.io.ObjectInputStream ois = new java.io.ObjectInputStream(
                new java.io.BufferedInputStream(new java.io.FileInputStream(objectInputFileName)));
        clusterer = (Clusterer) ois.readObject();
        // try and get the training header
        try {
            trainHeader = (Instances) ois.readObject();
        } catch (Exception ex) {
            // don't moan if we cant
        }
    } else {
        // Build the clusterer if no object file provided
        if (theClass == -1) {
            if (updateable) {
                clusterer.buildClusterer(source.getStructure());
                while (source.hasMoreElements(train)) {
                    inst = source.nextElement(train);
                    ((UpdateableClusterer) clusterer).updateClusterer(inst);
                }
                ((UpdateableClusterer) clusterer).updateFinished();
            } else {
                clusterer.buildClusterer(source.getDataSet());
            }
        } else {
            Remove removeClass = new Remove();
            removeClass.setAttributeIndices("" + theClass);
            removeClass.setInvertSelection(false);
            removeClass.setInputFormat(train);
            if (updateable) {
                Instances clusterTrain = Filter.useFilter(train, removeClass);
                clusterer.buildClusterer(clusterTrain);
                trainHeader = clusterTrain;
                while (source.hasMoreElements(train)) {
                    inst = source.nextElement(train);
                    removeClass.input(inst);
                    removeClass.batchFinished();
                    Instance clusterTrainInst = removeClass.output();
                    ((UpdateableClusterer) clusterer).updateClusterer(clusterTrainInst);
                }
                ((UpdateableClusterer) clusterer).updateFinished();
            } else {
                Instances clusterTrain = Filter.useFilter(source.getDataSet(), removeClass);
                clusterer.buildClusterer(clusterTrain);
                trainHeader = clusterTrain;
            }
            ClusterEvaluationEX ce = new ClusterEvaluationEX();
            ce.setClusterer(clusterer);
            ce.evaluateClusterer(train, trainFileName);

            return "\n\n=== Clustering stats for training data ===\n\n" + ce.clusterResultsToString();
        }
    }

    /* Output cluster predictions only (for the test data if specified,
       otherwise for the training data */
    if (printClusterAssignments) {
        return printClusterings(clusterer, trainFileName, testFileName, attributesToOutput);
    }

    text.append(clusterer.toString());
    text.append(
            "\n\n=== Clustering stats for training data ===\n\n" + printClusterStats(clusterer, trainFileName));

    if (testFileName.length() != 0) {
        // check header compatibility
        DataSource test = new DataSource(testFileName);
        Instances testStructure = test.getStructure();
        if (!trainHeader.equalHeaders(testStructure)) {
            throw new Exception("Training and testing data are not compatible\n");
        }

        text.append("\n\n=== Clustering stats for testing data ===\n\n"
                + printClusterStats(clusterer, testFileName));
    }

    if ((clusterer instanceof DensityBasedClusterer) && (doXval == true) && (testFileName.length() == 0)
            && (objectInputFileName.length() == 0)) {
        // cross validate the log likelihood on the training data
        random = new Random(seed);
        random.setSeed(seed);
        train = source.getDataSet();
        train.randomize(random);
        text.append(crossValidateModel(clusterer.getClass().getName(), train, folds, savedOptions, random));
    }

    // Save the clusterer if an object output file is provided
    if (objectOutputFileName.length() != 0) {
        //SerializationHelper.write(objectOutputFileName, clusterer);
        saveClusterer(objectOutputFileName, clusterer, trainHeader);
    }

    // If classifier is drawable output string describing graph
    if ((clusterer instanceof Drawable) && (graphFileName.length() != 0)) {
        BufferedWriter writer = new BufferedWriter(new FileWriter(graphFileName));
        writer.write(((Drawable) clusterer).graph());
        writer.newLine();
        writer.flush();
        writer.close();
    }

    return text.toString();
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Evaluates a classifier with the options given in an array of
 * strings. <p/>/*w  ww  .  ja  v  a2 s.c  o m*/
 *
 * Valid options are: <p/>
 *
 * -t name of training file <br/>
 * Name of the file with the training data. (required) <p/>
 *
 * -T name of test file <br/>
 * Name of the file with the test data. If missing a cross-validation 
 * is performed. <p/>
 *
 * -c class index <br/>
 * Index of the class attribute (1, 2, ...; default: last). <p/>
 *
 * -x number of folds <br/>
 * The number of folds for the cross-validation (default: 10). <p/>
 *
 * -no-cv <br/>
 * No cross validation.  If no test file is provided, no evaluation
 * is done. <p/>
 * 
 * -split-percentage percentage <br/>
 * Sets the percentage for the train/test set split, e.g., 66. <p/>
 * 
 * -preserve-order <br/>
 * Preserves the order in the percentage split instead of randomizing
 * the data first with the seed value ('-s'). <p/>
 *
 * -s seed <br/>
 * Random number seed for the cross-validation and percentage split
 * (default: 1). <p/>
 *
 * -m file with cost matrix <br/>
 * The name of a file containing a cost matrix. <p/>
 *
 * -l filename <br/>
 * Loads classifier from the given file. In case the filename ends with
 * ".xml",a PMML file is loaded or, if that fails, options are loaded from XML. <p/>
 *
 * -d filename <br/>
 * Saves classifier built from the training data into the given file. In case 
 * the filename ends with ".xml" the options are saved XML, not the model. <p/>
 *
 * -v <br/>
 * Outputs no statistics for the training data. <p/>
 *
 * -o <br/>
 * Outputs statistics only, not the classifier. <p/>
 * 
 * -i <br/>
 * Outputs detailed information-retrieval statistics per class. <p/>
 *
 * -k <br/>
 * Outputs information-theoretic statistics. <p/>
 *
 * -p range <br/>
 * Outputs predictions for test instances (or the train instances if no test
 * instances provided and -no-cv is used), along with the attributes in the specified range 
 * (and nothing else). Use '-p 0' if no attributes are desired. <p/>
 *
 * -distribution <br/>
 * Outputs the distribution instead of only the prediction
 * in conjunction with the '-p' option (only nominal classes). <p/>
 *
 * -r <br/>
 * Outputs cumulative margin distribution (and nothing else). <p/>
 *
 * -g <br/> 
 * Only for classifiers that implement "Graphable." Outputs
 * the graph representation of the classifier (and nothing
 * else). <p/>
 *
 * -xml filename | xml-string <br/>
 * Retrieves the options from the XML-data instead of the command line. <p/>
 *
 * @param classifier machine learning classifier
 * @param options the array of string containing the options
 * @throws Exception if model could not be evaluated successfully
 * @return a string describing the results 
 */
public static String evaluateModel(Classifier classifier, String[] options) throws Exception {

    Instances train = null, tempTrain, test = null, template = null;
    int seed = 1, folds = 10, classIndex = -1;
    boolean noCrossValidation = false;
    String trainFileName, testFileName, sourceClass, classIndexString, seedString, foldsString,
            objectInputFileName, objectOutputFileName, attributeRangeString;
    boolean noOutput = false, printClassifications = false, trainStatistics = true, printMargins = false,
            printComplexityStatistics = false, printGraph = false, classStatistics = false, printSource = false;
    StringBuffer text = new StringBuffer();
    DataSource trainSource = null, testSource = null;
    ObjectInputStream objectInputStream = null;
    BufferedInputStream xmlInputStream = null;
    CostMatrix costMatrix = null;
    StringBuffer schemeOptionsText = null;
    Range attributesToOutput = null;
    long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0;
    String xml = "";
    String[] optionsTmp = null;
    Classifier classifierBackup;
    Classifier classifierClassifications = null;
    boolean printDistribution = false;
    int actualClassIndex = -1; // 0-based class index
    String splitPercentageString = "";
    int splitPercentage = -1;
    boolean preserveOrder = false;
    boolean trainSetPresent = false;
    boolean testSetPresent = false;
    String thresholdFile;
    String thresholdLabel;
    StringBuffer predsBuff = null; // predictions from cross-validation

    // help requested?
    if (Utils.getFlag("h", options) || Utils.getFlag("help", options)) {

        // global info requested as well?
        boolean globalInfo = Utils.getFlag("synopsis", options) || Utils.getFlag("info", options);

        throw new Exception("\nHelp requested." + makeOptionString(classifier, globalInfo));
    }

    try {
        // do we get the input from XML instead of normal parameters?
        xml = Utils.getOption("xml", options);
        if (!xml.equals(""))
            options = new XMLOptions(xml).toArray();

        // is the input model only the XML-Options, i.e. w/o built model?
        optionsTmp = new String[options.length];
        for (int i = 0; i < options.length; i++)
            optionsTmp[i] = options[i];

        String tmpO = Utils.getOption('l', optionsTmp);
        //if (Utils.getOption('l', optionsTmp).toLowerCase().endsWith(".xml")) {
        if (tmpO.endsWith(".xml")) {
            // try to load file as PMML first
            boolean success = false;
            try {
                //PMMLModel pmmlModel = PMMLFactory.getPMMLModel(tmpO);
                //if (pmmlModel instanceof PMMLClassifier) {
                //classifier = ((PMMLClassifier)pmmlModel);
                // success = true;
                //}
            } catch (IllegalArgumentException ex) {
                success = false;
            }
            if (!success) {
                // load options from serialized data  ('-l' is automatically erased!)
                XMLClassifier xmlserial = new XMLClassifier();
                Classifier cl = (Classifier) xmlserial.read(Utils.getOption('l', options));

                // merge options
                optionsTmp = new String[options.length + cl.getOptions().length];
                System.arraycopy(cl.getOptions(), 0, optionsTmp, 0, cl.getOptions().length);
                System.arraycopy(options, 0, optionsTmp, cl.getOptions().length, options.length);
                options = optionsTmp;
            }
        }

        noCrossValidation = Utils.getFlag("no-cv", options);
        // Get basic options (options the same for all schemes)
        classIndexString = Utils.getOption('c', options);
        if (classIndexString.length() != 0) {
            if (classIndexString.equals("first"))
                classIndex = 1;
            else if (classIndexString.equals("last"))
                classIndex = -1;
            else
                classIndex = Integer.parseInt(classIndexString);
        }
        trainFileName = Utils.getOption('t', options);
        objectInputFileName = Utils.getOption('l', options);
        objectOutputFileName = Utils.getOption('d', options);
        testFileName = Utils.getOption('T', options);
        foldsString = Utils.getOption('x', options);
        if (foldsString.length() != 0) {
            folds = Integer.parseInt(foldsString);
        }
        seedString = Utils.getOption('s', options);
        if (seedString.length() != 0) {
            seed = Integer.parseInt(seedString);
        }
        if (trainFileName.length() == 0) {
            if (objectInputFileName.length() == 0) {
                throw new Exception("No training file and no object " + "input file given.");
            }
            if (testFileName.length() == 0) {
                throw new Exception("No training file and no test " + "file given.");
            }
        } else if ((objectInputFileName.length() != 0)
                && ((!(classifier instanceof UpdateableClassifier)) || (testFileName.length() == 0))) {
            throw new Exception("Classifier not incremental, or no " + "test file provided: can't "
                    + "use both train and model file.");
        }
        try {
            if (trainFileName.length() != 0) {
                trainSetPresent = true;
                trainSource = new DataSource(trainFileName);
            }
            if (testFileName.length() != 0) {
                testSetPresent = true;
                testSource = new DataSource(testFileName);
            }
            if (objectInputFileName.length() != 0) {
                if (objectInputFileName.endsWith(".xml")) {
                    // if this is the case then it means that a PMML classifier was
                    // successfully loaded earlier in the code
                    objectInputStream = null;
                    xmlInputStream = null;
                } else {
                    InputStream is = new FileInputStream(objectInputFileName);
                    if (objectInputFileName.endsWith(".gz")) {
                        is = new GZIPInputStream(is);
                    }
                    // load from KOML?
                    if (!(objectInputFileName.endsWith(".koml") && KOML.isPresent())) {
                        objectInputStream = new ObjectInputStream(is);
                        xmlInputStream = null;
                    } else {
                        objectInputStream = null;
                        xmlInputStream = new BufferedInputStream(is);
                    }
                }
            }
        } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
        }
        if (testSetPresent) {
            template = test = testSource.getStructure();
            if (classIndex != -1) {
                test.setClassIndex(classIndex - 1);
            } else {
                if ((test.classIndex() == -1) || (classIndexString.length() != 0))
                    test.setClassIndex(test.numAttributes() - 1);
            }
            actualClassIndex = test.classIndex();
        } else {
            // percentage split
            splitPercentageString = Utils.getOption("split-percentage", options);
            if (splitPercentageString.length() != 0) {
                if (foldsString.length() != 0)
                    throw new Exception("Percentage split cannot be used in conjunction with "
                            + "cross-validation ('-x').");
                splitPercentage = Integer.parseInt(splitPercentageString);
                if ((splitPercentage <= 0) || (splitPercentage >= 100))
                    throw new Exception("Percentage split value needs be >0 and <100.");
            } else {
                splitPercentage = -1;
            }
            preserveOrder = Utils.getFlag("preserve-order", options);
            if (preserveOrder) {
                if (splitPercentage == -1)
                    throw new Exception("Percentage split ('-percentage-split') is missing.");
            }
            // create new train/test sources
            if (splitPercentage > 0) {
                testSetPresent = true;
                Instances tmpInst = trainSource.getDataSet(actualClassIndex);
                if (!preserveOrder)
                    tmpInst.randomize(new Random(seed));
                int trainSize = tmpInst.numInstances() * splitPercentage / 100;
                int testSize = tmpInst.numInstances() - trainSize;
                Instances trainInst = new Instances(tmpInst, 0, trainSize);
                Instances testInst = new Instances(tmpInst, trainSize, testSize);
                trainSource = new DataSource(trainInst);
                testSource = new DataSource(testInst);
                template = test = testSource.getStructure();
                if (classIndex != -1) {
                    test.setClassIndex(classIndex - 1);
                } else {
                    if ((test.classIndex() == -1) || (classIndexString.length() != 0))
                        test.setClassIndex(test.numAttributes() - 1);
                }
                actualClassIndex = test.classIndex();
            }
        }
        if (trainSetPresent) {
            template = train = trainSource.getStructure();
            if (classIndex != -1) {
                train.setClassIndex(classIndex - 1);
            } else {
                if ((train.classIndex() == -1) || (classIndexString.length() != 0))
                    train.setClassIndex(train.numAttributes() - 1);
            }
            actualClassIndex = train.classIndex();
            if ((testSetPresent) && !test.equalHeaders(train)) {
                throw new IllegalArgumentException("Train and test file not compatible!");
            }
        }
        if (template == null) {
            throw new Exception("No actual dataset provided to use as template");
        }
        costMatrix = handleCostOption(Utils.getOption('m', options), template.numClasses());

        classStatistics = Utils.getFlag('i', options);
        noOutput = Utils.getFlag('o', options);
        trainStatistics = !Utils.getFlag('v', options);
        printComplexityStatistics = Utils.getFlag('k', options);
        printMargins = Utils.getFlag('r', options);
        printGraph = Utils.getFlag('g', options);
        sourceClass = Utils.getOption('z', options);
        printSource = (sourceClass.length() != 0);
        printDistribution = Utils.getFlag("distribution", options);
        thresholdFile = Utils.getOption("threshold-file", options);
        thresholdLabel = Utils.getOption("threshold-label", options);

        // Check -p option
        try {
            attributeRangeString = Utils.getOption('p', options);
        } catch (Exception e) {
            throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. "
                    + "It now expects a parameter specifying a range of attributes "
                    + "to list with the predictions. Use '-p 0' for none.");
        }
        if (attributeRangeString.length() != 0) {
            printClassifications = true;
            noOutput = true;
            if (!attributeRangeString.equals("0"))
                attributesToOutput = new Range(attributeRangeString);
        }

        if (!printClassifications && printDistribution)
            throw new Exception("Cannot print distribution without '-p' option!");

        // if no training file given, we don't have any priors
        if ((!trainSetPresent) && (printComplexityStatistics))
            throw new Exception("Cannot print complexity statistics ('-k') without training file ('-t')!");

        // If a model file is given, we can't process 
        // scheme-specific options
        if (objectInputFileName.length() != 0) {
            Utils.checkForRemainingOptions(options);
        } else {

            // Set options for classifier
            if (classifier instanceof OptionHandler) {
                for (int i = 0; i < options.length; i++) {
                    if (options[i].length() != 0) {
                        if (schemeOptionsText == null) {
                            schemeOptionsText = new StringBuffer();
                        }
                        if (options[i].indexOf(' ') != -1) {
                            schemeOptionsText.append('"' + options[i] + "\" ");
                        } else {
                            schemeOptionsText.append(options[i] + " ");
                        }
                    }
                }
                ((OptionHandler) classifier).setOptions(options);
            }
        }
        Utils.checkForRemainingOptions(options);
    } catch (Exception e) {
        throw new Exception("\nWeka exception: " + e.getMessage() + makeOptionString(classifier, false));
    }

    // Setup up evaluation objects
    Evaluation_D trainingEvaluation = new Evaluation_D(new Instances(template, 0), costMatrix);
    Evaluation_D testingEvaluation = new Evaluation_D(new Instances(template, 0), costMatrix);

    // disable use of priors if no training file given
    if (!trainSetPresent)
        testingEvaluation.useNoPriors();

    if (objectInputFileName.length() != 0) {
        // Load classifier from file
        if (objectInputStream != null) {
            classifier = (Classifier) objectInputStream.readObject();
            // try and read a header (if present)
            Instances savedStructure = null;
            try {
                savedStructure = (Instances) objectInputStream.readObject();
            } catch (Exception ex) {
                // don't make a fuss
            }
            if (savedStructure != null) {
                // test for compatibility with template
                if (!template.equalHeaders(savedStructure)) {
                    throw new Exception("training and test set are not compatible");
                }
            }
            objectInputStream.close();
        } else if (xmlInputStream != null) {
            // whether KOML is available has already been checked (objectInputStream would null otherwise)!
            classifier = (Classifier) KOML.read(xmlInputStream);
            xmlInputStream.close();
        }
    }

    // backup of fully setup classifier for cross-validation
    classifierBackup = Classifier.makeCopy(classifier);

    // Build the classifier if no object file provided
    if ((classifier instanceof UpdateableClassifier) && (testSetPresent || noCrossValidation)
            && (costMatrix == null) && (trainSetPresent)) {
        // Build classifier incrementally
        trainingEvaluation.setPriors(train);
        testingEvaluation.setPriors(train);
        trainTimeStart = System.currentTimeMillis();
        if (objectInputFileName.length() == 0) {
            classifier.buildClassifier(train);
        }
        Instance trainInst;
        while (trainSource.hasMoreElements(train)) {
            trainInst = trainSource.nextElement(train);
            trainingEvaluation.updatePriors(trainInst);
            testingEvaluation.updatePriors(trainInst);
            ((UpdateableClassifier) classifier).updateClassifier(trainInst);
        }
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    } else if (objectInputFileName.length() == 0) {
        // Build classifier in one go
        tempTrain = trainSource.getDataSet(actualClassIndex);
        trainingEvaluation.setPriors(tempTrain);
        testingEvaluation.setPriors(tempTrain);
        trainTimeStart = System.currentTimeMillis();
        classifier.buildClassifier(tempTrain);
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    }

    // backup of fully trained classifier for printing the classifications
    if (printClassifications)
        classifierClassifications = Classifier.makeCopy(classifier);

    // Save the classifier if an object output file is provided
    if (objectOutputFileName.length() != 0) {
        OutputStream os = new FileOutputStream(objectOutputFileName);
        // binary
        if (!(objectOutputFileName.endsWith(".xml")
                || (objectOutputFileName.endsWith(".koml") && KOML.isPresent()))) {
            if (objectOutputFileName.endsWith(".gz")) {
                os = new GZIPOutputStream(os);
            }
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
            objectOutputStream.writeObject(classifier);
            if (template != null) {
                objectOutputStream.writeObject(template);
            }
            objectOutputStream.flush();
            objectOutputStream.close();
        }
        // KOML/XML
        else {
            BufferedOutputStream xmlOutputStream = new BufferedOutputStream(os);
            if (objectOutputFileName.endsWith(".xml")) {
                XMLSerialization xmlSerial = new XMLClassifier();
                xmlSerial.write(xmlOutputStream, classifier);
            } else
            // whether KOML is present has already been checked
            // if not present -> ".koml" is interpreted as binary - see above
            if (objectOutputFileName.endsWith(".koml")) {
                KOML.write(xmlOutputStream, classifier);
            }
            xmlOutputStream.close();
        }
    }

    // If classifier is drawable output string describing graph
    if ((classifier instanceof Drawable) && (printGraph)) {
        return ((Drawable) classifier).graph();
    }

    // Output the classifier as equivalent source
    if ((classifier instanceof Sourcable) && (printSource)) {
        return wekaStaticWrapper((Sourcable) classifier, sourceClass);
    }

    // Output model
    if (!(noOutput || printMargins)) {
        if (classifier instanceof OptionHandler) {
            if (schemeOptionsText != null) {
                text.append("\nOptions: " + schemeOptionsText);
                text.append("\n");
            }
        }
        text.append("\n" + classifier.toString() + "\n");
    }

    if (!printMargins && (costMatrix != null)) {
        text.append("\n=== Evaluation Cost Matrix ===\n\n");
        text.append(costMatrix.toString());
    }

    // Output test instance predictions only
    if (printClassifications) {
        DataSource source = testSource;
        predsBuff = new StringBuffer();
        // no test set -> use train set
        if (source == null && noCrossValidation) {
            source = trainSource;
            predsBuff.append("\n=== Predictions on training data ===\n\n");
        } else {
            predsBuff.append("\n=== Predictions on test data ===\n\n");
        }
        if (source != null) {
            /*      return printClassifications(classifierClassifications, new Instances(template, 0),
                    source, actualClassIndex + 1, attributesToOutput,
                    printDistribution); */
            printClassifications(classifierClassifications, new Instances(template, 0), source,
                    actualClassIndex + 1, attributesToOutput, printDistribution, predsBuff);
            //        return predsText.toString();
        }
    }

    // Compute error estimate from training data
    if ((trainStatistics) && (trainSetPresent)) {

        if ((classifier instanceof UpdateableClassifier) && (testSetPresent) && (costMatrix == null)) {

            // Classifier was trained incrementally, so we have to 
            // reset the source.
            trainSource.reset();

            // Incremental testing
            train = trainSource.getStructure(actualClassIndex);
            testTimeStart = System.currentTimeMillis();
            Instance trainInst;
            while (trainSource.hasMoreElements(train)) {
                trainInst = trainSource.nextElement(train);
                trainingEvaluation.evaluateModelOnce((Classifier) classifier, trainInst);
            }
            testTimeElapsed = System.currentTimeMillis() - testTimeStart;
        } else {
            testTimeStart = System.currentTimeMillis();
            trainingEvaluation.evaluateModel(classifier, trainSource.getDataSet(actualClassIndex));
            testTimeElapsed = System.currentTimeMillis() - testTimeStart;
        }

        // Print the results of the training evaluation
        if (printMargins) {
            return trainingEvaluation.toCumulativeMarginDistributionString();
        } else {
            if (!printClassifications) {
                text.append("\nTime taken to build model: " + Utils.doubleToString(trainTimeElapsed / 1000.0, 2)
                        + " seconds");

                if (splitPercentage > 0)
                    text.append("\nTime taken to test model on training split: ");
                else
                    text.append("\nTime taken to test model on training data: ");
                text.append(Utils.doubleToString(testTimeElapsed / 1000.0, 2) + " seconds");

                if (splitPercentage > 0)
                    text.append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " split ===\n",
                            printComplexityStatistics));
                else
                    text.append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " data ===\n",
                            printComplexityStatistics));

                if (template.classAttribute().isNominal()) {
                    if (classStatistics) {
                        text.append("\n\n" + trainingEvaluation.toClassDetailsString());
                    }
                    if (!noCrossValidation)
                        text.append("\n\n" + trainingEvaluation.toMatrixString());
                }
            }
        }
    }

    // Compute proper error estimates
    if (testSource != null) {
        // Testing is on the supplied test data
        testSource.reset();
        test = testSource.getStructure(test.classIndex());
        Instance testInst;
        while (testSource.hasMoreElements(test)) {
            testInst = testSource.nextElement(test);
            testingEvaluation.evaluateModelOnceAndRecordPrediction((Classifier) classifier, testInst);
        }

        if (splitPercentage > 0) {
            if (!printClassifications) {
                text.append("\n\n" + testingEvaluation.toSummaryString("=== Error on test split ===\n",
                        printComplexityStatistics));
            }
        } else {
            if (!printClassifications) {
                text.append("\n\n" + testingEvaluation.toSummaryString("=== Error on test data ===\n",
                        printComplexityStatistics));
            }
        }

    } else if (trainSource != null) {
        if (!noCrossValidation) {
            // Testing is via cross-validation on training data
            Random random = new Random(seed);
            // use untrained (!) classifier for cross-validation
            classifier = Classifier.makeCopy(classifierBackup);
            if (!printClassifications) {
                testingEvaluation.crossValidateModel(classifier, trainSource.getDataSet(actualClassIndex),
                        folds, random);
                if (template.classAttribute().isNumeric()) {
                    text.append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n",
                            printComplexityStatistics));
                } else {
                    text.append("\n\n\n" + testingEvaluation.toSummaryString(
                            "=== Stratified " + "cross-validation ===\n", printComplexityStatistics));
                }
            } else {
                predsBuff = new StringBuffer();
                predsBuff.append("\n=== Predictions under cross-validation ===\n\n");
                testingEvaluation.crossValidateModel(classifier, trainSource.getDataSet(actualClassIndex),
                        folds, random, predsBuff, attributesToOutput, new Boolean(printDistribution));
                /*          if (template.classAttribute().isNumeric()) {
                            text.append("\n\n\n" + testingEvaluation.
                toSummaryString("=== Cross-validation ===\n",
                                printComplexityStatistics));
                          } else {
                            text.append("\n\n\n" + testingEvaluation.
                toSummaryString("=== Stratified " + 
                                "cross-validation ===\n",
                                printComplexityStatistics));
                          } */
            }
        }
    }
    if (template.classAttribute().isNominal()) {
        if (classStatistics && !noCrossValidation && !printClassifications) {
            text.append("\n\n" + testingEvaluation.toClassDetailsString());
        }
        if (!noCrossValidation && !printClassifications)
            text.append("\n\n" + testingEvaluation.toMatrixString());

    }

    // predictions from cross-validation?
    if (predsBuff != null) {
        text.append("\n" + predsBuff);
    }

    if ((thresholdFile.length() != 0) && template.classAttribute().isNominal()) {
        int labelIndex = 0;
        if (thresholdLabel.length() != 0)
            labelIndex = template.classAttribute().indexOfValue(thresholdLabel);
        if (labelIndex == -1)
            throw new IllegalArgumentException("Class label '" + thresholdLabel + "' is unknown!");
        ThresholdCurve tc = new ThresholdCurve();
        Instances result = tc.getCurve(testingEvaluation.predictions(), labelIndex);
        DataSink.write(thresholdFile, result);
    }

    return text.toString();
}

From source file:examples.ExperimentDemo.java

License:Open Source License

/**
 * Expects the following parameters:/*from  www .  j  av a  2s.  c  om*/
 * <ul>
 * <li>-classifier "classifier incl. parameters"</li>
 * <li>-exptype "classification|regression"</li>
 * <li>-splittype "crossvalidation|randomsplit"</li>
 * <li>-runs "# of runs"</li>
 * <li>-folds "# of cross-validation folds"</li>
 * <li>-percentage "percentage for randomsplit"</li>
 * <li>-result "arff file for storing the results"</li>
 * <li>-t "dataset" (can be supplied multiple times)</li>
 * </ul>
 * 
 * @param args
 *            the commandline arguments
 * @throws Exception
 *             if something goes wrong
 */

// ref: http://weka.wikispaces.com/Using+the+Experiment+API
public static void main(String[] args) throws Exception {

    // @xr: my modification of args, output to download folder
    // @xr: direct args not working, has to put paras in run-configuration-paras
    //      String[] args = { 
    //            "weka.classifiers.trees.J48", 
    //            "classification",
    //            "crossvalidation", 
    //            "10", 
    //            "10",
    //            "/Users/renxin/Downloads/output.arff", 
    //            "vote.arff", 
    //            "iris.arff" };

    //      String[] args = {
    //               "-classifier weka.classifiers.trees.J48",
    //               "-exptype classification",
    //               "-splittype crossvalidation",
    //               "-runs 10",
    //               "-folds 10",
    //               "-result /some/where/results.arff",
    //               "-t vote.arff",
    //               "-t iris.arff"
    //      };

    // parameters provided?
    if (args.length == 0) {
        System.out.println("\nUsage: ExperimentDemo\n" + "\t   -classifier <classifier incl. parameters>\n"
                + "\t   -exptype <classification|regression>\n"
                + "\t   -splittype <crossvalidation|randomsplit>\n" + "\t   -runs <# of runs>\n"
                + "\t   -folds <folds for CV>\n" + "\t   -percentage <percentage for randomsplit>\n"
                + "\t   -result <ARFF file for storing the results>\n"
                + "\t   -t dataset (can be supplied multiple times)\n");
        System.exit(1);
    }

    // 1. setup the experiment
    System.out.println("Setting up...");
    Experiment exp = new Experiment();
    exp.setPropertyArray(new Classifier[0]);
    exp.setUsePropertyIterator(true);

    String option;

    // classification or regression
    option = Utils.getOption("exptype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No experiment type provided!");

    SplitEvaluator se = null;
    Classifier sec = null;
    boolean classification = false;
    if (option.equals("classification")) {
        classification = true;
        se = new ClassifierSplitEvaluator();
        sec = ((ClassifierSplitEvaluator) se).getClassifier();
    } else if (option.equals("regression")) {
        se = new RegressionSplitEvaluator();
        sec = ((RegressionSplitEvaluator) se).getClassifier();
    } else {
        throw new IllegalArgumentException("Unknown experiment type '" + option + "'!");
    }

    // crossvalidation or randomsplit
    option = Utils.getOption("splittype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No split type provided!");

    if (option.equals("crossvalidation")) {
        CrossValidationResultProducer cvrp = new CrossValidationResultProducer();
        option = Utils.getOption("folds", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No folds provided!");
        cvrp.setNumFolds(Integer.parseInt(option));
        cvrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", CrossValidationResultProducer.class),
                    CrossValidationResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(cvrp);
        exp.setPropertyPath(propertyPath);

    } else if (option.equals("randomsplit")) {
        RandomSplitResultProducer rsrp = new RandomSplitResultProducer();
        rsrp.setRandomizeData(true);
        option = Utils.getOption("percentage", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No percentage provided!");
        rsrp.setTrainPercent(Double.parseDouble(option));
        rsrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", RandomSplitResultProducer.class),
                    RandomSplitResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(rsrp);
        exp.setPropertyPath(propertyPath);
    } else {
        throw new IllegalArgumentException("Unknown split type '" + option + "'!");
    }

    // runs
    option = Utils.getOption("runs", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No runs provided!");
    exp.setRunLower(1);
    exp.setRunUpper(Integer.parseInt(option));

    // classifier
    option = Utils.getOption("classifier", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No classifier provided!");
    String[] options = Utils.splitOptions(option);
    String classname = options[0];
    options[0] = "";
    Classifier c = (Classifier) Utils.forName(Classifier.class, classname, options);
    exp.setPropertyArray(new Classifier[] { c });

    // datasets
    boolean data = false;
    DefaultListModel model = new DefaultListModel();
    do {
        option = Utils.getOption("t", args);
        if (option.length() > 0) {
            File file = new File(option);
            if (!file.exists())
                throw new IllegalArgumentException("File '" + option + "' does not exist!");
            data = true;
            model.addElement(file);
        }
    } while (option.length() > 0);
    if (!data)
        throw new IllegalArgumentException("No data files provided!");
    exp.setDatasets(model);

    // result
    option = Utils.getOption("result", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No result file provided!");
    InstancesResultListener irl = new InstancesResultListener();
    irl.setOutputFile(new File(option));
    exp.setResultListener(irl);

    // 2. run experiment
    System.out.println("Initializing...");
    exp.initialize();
    System.out.println("Running...");
    exp.runExperiment();
    System.out.println("Finishing...");
    exp.postProcess();

    // 3. calculate statistics and output them
    System.out.println("Evaluating...");
    PairedTTester tester = new PairedCorrectedTTester();
    Instances result = new Instances(new BufferedReader(new FileReader(irl.getOutputFile())));
    tester.setInstances(result);
    tester.setSortColumn(-1);
    tester.setRunColumn(result.attribute("Key_Run").index());
    if (classification)
        tester.setFoldColumn(result.attribute("Key_Fold").index());
    tester.setResultsetKeyColumns(new Range("" + (result.attribute("Key_Dataset").index() + 1)));
    tester.setDatasetKeyColumns(new Range("" + (result.attribute("Key_Scheme").index() + 1) + ","
            + (result.attribute("Key_Scheme_options").index() + 1) + ","
            + (result.attribute("Key_Scheme_version_ID").index() + 1)));
    tester.setResultMatrix(new ResultMatrixPlainText());
    tester.setDisplayedResultsets(null);
    tester.setSignificanceLevel(0.05);
    tester.setShowStdDevs(true);
    // fill result matrix (but discarding the output)
    if (classification)
        tester.multiResultsetFull(0, result.attribute("Percent_correct").index());
    else
        tester.multiResultsetFull(0, result.attribute("Correlation_coefficient").index());
    // output results for reach dataset
    System.out.println("\nResult:");
    ResultMatrix matrix = tester.getResultMatrix();
    for (int i = 0; i < matrix.getColCount(); i++) {
        System.out.println(matrix.getColName(i));
        System.out.println("    Perc. correct: " + matrix.getMean(i, 0));
        System.out.println("    StdDev: " + matrix.getStdDev(i, 0));
    }
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Evaluates a classifier with the options given in an array of
 * strings. <p>//  w w  w.  j  a va  2s  .  c o  m
 *
 * Valid options are: <p>
 *
 * -t name of training file <br>
 * Name of the file with the training data. (required) <p>
 *
 * -T name of test file <br>
 * Name of the file with the test data. If missing a cross-validation
 * is performed. <p>
 *
 * -c class index <br>
 * Index of the class attribute (1, 2, ...; default: last). <p>
 *
 * -x number of folds <br>
 * The number of folds for the cross-validation (default: 10). <p>
 *
 * -s random number seed <br>
 * Random number seed for the cross-validation (default: 1). <p>
 *
 * -m file with cost matrix <br>
 * The name of a file containing a cost matrix. <p>
 *
 * -l name of model input file <br>
 * Loads classifier from the given file. <p>
 *
 * -d name of model output file <br>
 * Saves classifier built from the training data into the given file. <p>
 *
 * -v <br>
 * Outputs no statistics for the training data. <p>
 *
 * -o <br>
 * Outputs statistics only, not the classifier. <p>
 *
 * -i <br>
 * Outputs detailed information-retrieval statistics per class. <p>
 *
 * -k <br>
 * Outputs information-theoretic statistics. <p>
 *
 * -p <br>
 * Outputs predictions for test instances (and nothing else). <p>
 *
 * -r <br>
 * Outputs cumulative margin distribution (and nothing else). <p>
 *
 * -g <br>
 * Only for classifiers that implement "Graphable." Outputs
 * the graph representation of the classifier (and nothing
 * else). <p>
 *
 * @param classifier machine learning classifier
 * @param options the array of string containing the options
 * @exception Exception if model could not be evaluated successfully
 * @return a string describing the results */
public static String[] evaluateModel(Classifier classifier, String trainFileName, String objectOutputFileName)
        throws Exception {

    Instances train = null, tempTrain, test = null, template = null;
    int seed = 1, folds = 10, classIndex = -1;
    String testFileName, sourceClass, classIndexString, seedString, foldsString, objectInputFileName,
            attributeRangeString;
    boolean IRstatistics = false, noOutput = false, printClassifications = false, trainStatistics = true,
            printMargins = false, printComplexityStatistics = false, printGraph = false,
            classStatistics = false, printSource = false;
    StringBuffer text = new StringBuffer();
    BufferedReader trainReader = null, testReader = null;
    ObjectInputStream objectInputStream = null;
    CostMatrix costMatrix = null;
    StringBuffer schemeOptionsText = null;
    Range attributesToOutput = null;
    long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0;

    try {

        String[] options = null;

        // Get basic options (options the same for all schemes)
        classIndexString = Utils.getOption('c', options);
        if (classIndexString.length() != 0) {
            classIndex = Integer.parseInt(classIndexString);
        }
        //  trainFileName = Utils.getOption('t', options);

        objectInputFileName = Utils.getOption('l', options);
        //   objectOutputFileName = Utils.getOption('d', options);
        testFileName = Utils.getOption('T', options);
        if (trainFileName.length() == 0) {
            if (objectInputFileName.length() == 0) {
                throw new Exception("No training file and no object " + "input file given.");
            }
            if (testFileName.length() == 0) {
                throw new Exception("No training file and no test " + "file given.");
            }
        } else if ((objectInputFileName.length() != 0)
                && ((!(classifier instanceof UpdateableClassifier)) || (testFileName.length() == 0))) {
            throw new Exception("Classifier not incremental, or no " + "test file provided: can't "
                    + "use both train and model file.");
        }
        try {
            if (trainFileName.length() != 0) {
                trainReader = new BufferedReader(new FileReader(trainFileName));
            }
            if (testFileName.length() != 0) {
                testReader = new BufferedReader(new FileReader(testFileName));
            }
            if (objectInputFileName.length() != 0) {
                InputStream is = new FileInputStream(objectInputFileName);
                if (objectInputFileName.endsWith(".gz")) {
                    is = new GZIPInputStream(is);
                }
                objectInputStream = new ObjectInputStream(is);
            }
        } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
        }
        if (testFileName.length() != 0) {
            template = test = new Instances(testReader, 1);
            if (classIndex != -1) {
                test.setClassIndex(classIndex - 1);
            } else {
                test.setClassIndex(test.numAttributes() - 1);
            }
            if (classIndex > test.numAttributes()) {
                throw new Exception("Index of class attribute too large.");
            }
        }
        if (trainFileName.length() != 0) {
            if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0)) {
                train = new Instances(trainReader, 1);
            } else {
                train = new Instances(trainReader);
            }
            template = train;
            if (classIndex != -1) {
                train.setClassIndex(classIndex - 1);
            } else {
                train.setClassIndex(train.numAttributes() - 1);
            }
            if ((testFileName.length() != 0) && !test.equalHeaders(train)) {
                throw new IllegalArgumentException("Train and test file not compatible!");
            }
            if (classIndex > train.numAttributes()) {
                throw new Exception("Index of class attribute too large.");
            }
            //train = new Instances(train);
        }
        if (template == null) {
            throw new Exception("No actual dataset provided to use as template");
        }
        seedString = Utils.getOption('s', options);
        if (seedString.length() != 0) {
            seed = Integer.parseInt(seedString);
        }
        foldsString = Utils.getOption('x', options);
        if (foldsString.length() != 0) {
            folds = Integer.parseInt(foldsString);
        }
        costMatrix = handleCostOption(Utils.getOption('m', options), template.numClasses());

        classStatistics = Utils.getFlag('i', options);
        noOutput = Utils.getFlag('o', options);
        trainStatistics = !Utils.getFlag('v', options);
        printComplexityStatistics = Utils.getFlag('k', options);
        printMargins = Utils.getFlag('r', options);
        printGraph = Utils.getFlag('g', options);
        sourceClass = Utils.getOption('z', options);
        printSource = (sourceClass.length() != 0);

        // Check -p option
        try {
            attributeRangeString = Utils.getOption('p', options);
        } catch (Exception e) {
            throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. "
                    + "It now expects a parameter specifying a range of attributes "
                    + "to list with the predictions. Use '-p 0' for none.");
        }
        if (attributeRangeString.length() != 0) {
            printClassifications = true;
            if (!attributeRangeString.equals("0")) {
                attributesToOutput = new Range(attributeRangeString);
            }
        }

        // If a model file is given, we can't process
        // scheme-specific options
        if (objectInputFileName.length() != 0) {
            Utils.checkForRemainingOptions(options);
        } else {

            // Set options for classifier
            if (classifier instanceof OptionHandler) {
                /* for (int i = 0; i < options.length; i++) {
                if (options[i].length() != 0) {
                    if (schemeOptionsText == null) {
                        schemeOptionsText = new StringBuffer();
                    }
                    if (options[i].indexOf(' ') != -1) {
                        schemeOptionsText.append('"' + options[i] + "\" ");
                    } else {
                        schemeOptionsText.append(options[i] + " ");
                    }
                }
                 }
                 */
                ((OptionHandler) classifier).setOptions(options);
            }
        }
        Utils.checkForRemainingOptions(options);

    } catch (Exception e) {
        throw new Exception("\nWeka exception: " + e.getMessage() + makeOptionString(classifier));
    }

    // Setup up evaluation objects
    EvaluationInternal trainingEvaluation = new EvaluationInternal(new Instances(template, 0), costMatrix);
    EvaluationInternal testingEvaluation = new EvaluationInternal(new Instances(template, 0), costMatrix);

    if (objectInputFileName.length() != 0) {

        // Load classifier from file
        classifier = (Classifier) objectInputStream.readObject();
        objectInputStream.close();
    }

    // Build the classifier if no object file provided
    if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0) && (costMatrix == null)
            && (trainFileName.length() != 0)) {

        // Build classifier incrementally
        trainingEvaluation.setPriors(train);
        testingEvaluation.setPriors(train);
        trainTimeStart = System.currentTimeMillis();
        if (objectInputFileName.length() == 0) {
            classifier.buildClassifier(train);
        }
        while (train.readInstance(trainReader)) {

            trainingEvaluation.updatePriors(train.instance(0));
            testingEvaluation.updatePriors(train.instance(0));
            ((UpdateableClassifier) classifier).updateClassifier(train.instance(0));
            train.delete(0);
        }
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
        trainReader.close();
    } else if (objectInputFileName.length() == 0) {

        // Build classifier in one go
        tempTrain = new Instances(train);
        trainingEvaluation.setPriors(tempTrain);
        testingEvaluation.setPriors(tempTrain);
        trainTimeStart = System.currentTimeMillis();
        classifier.buildClassifier(tempTrain);
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    }

    // Save the classifier if an object output file is provided
    if (objectOutputFileName.length() != 0) {
        OutputStream os = new FileOutputStream(objectOutputFileName);
        if (objectOutputFileName.endsWith(".gz")) {
            os = new GZIPOutputStream(os);
        }
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
        objectOutputStream.writeObject(classifier);
        objectOutputStream.flush();
        objectOutputStream.close();
    }

    /*   // If classifier is drawable output string describing graph
       if ((classifier instanceof Drawable)
    && (printGraph)) {
    return ((Drawable) classifier).graph();
       }
            
       // Output the classifier as equivalent source
       if ((classifier instanceof Sourcable)
    && (printSource)) {
    return wekaStaticWrapper((Sourcable) classifier, sourceClass);
       }
            
       // Output test instance predictions only
       if (printClassifications) {
    return printClassifications(classifier, new Instances(template, 0),
                                testFileName, classIndex, attributesToOutput);
       }
       */

    // Output model
    if (!(noOutput || printMargins)) {
        if (classifier instanceof OptionHandler) {
            if (schemeOptionsText != null) {
                text.append("\nOptions: " + schemeOptionsText);
                text.append("\n");
            }
        }
        text.append("\n" + classifier.toString() + "\n");
    }

    if (!printMargins && (costMatrix != null)) {
        text.append("\n=== Evaluation Cost Matrix ===\n\n").append(costMatrix.toString());
    }

    // Compute error estimate from training data
    if ((trainStatistics) && (trainFileName.length() != 0)) {

        if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0)
                && (costMatrix == null)) {

            // Classifier was trained incrementally, so we have to
            // reopen the training data in order to test on it.
            trainReader = new BufferedReader(new FileReader(trainFileName));

            // Incremental testing
            train = new Instances(trainReader, 1);
            if (classIndex != -1) {
                train.setClassIndex(classIndex - 1);
            } else {
                train.setClassIndex(train.numAttributes() - 1);
            }
            testTimeStart = System.currentTimeMillis();
            while (train.readInstance(trainReader)) {

                trainingEvaluation.evaluateModelOnce((Classifier) classifier, train.instance(0));
                train.delete(0);
            }
            testTimeElapsed = System.currentTimeMillis() - testTimeStart;
            trainReader.close();
        } else {
            testTimeStart = System.currentTimeMillis();
            trainingEvaluation.evaluateModel(classifier, train);
            testTimeElapsed = System.currentTimeMillis() - testTimeStart;
        }

        // Print the results of the training evaluation
        //  if (printMargins) {
        //      return trainingEvaluation.toCumulativeMarginDistributionString();
        //   } else {
        text.append("\nTime taken to build model: " + Utils.doubleToString(trainTimeElapsed / 1000.0, 2)
                + " seconds");
        text.append("\nTime taken to test model on training data: "
                + Utils.doubleToString(testTimeElapsed / 1000.0, 2) + " seconds");
        text.append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " data ===\n",
                printComplexityStatistics));
        if (template.classAttribute().isNominal()) {
            if (classStatistics) {
                text.append("\n\n" + trainingEvaluation.toClassDetailsString());
            }
            text.append("\n\n" + trainingEvaluation.toMatrixString());
        }

        //  }
    }

    // Compute proper error estimates
    if (testFileName.length() != 0) {

        // Testing is on the supplied test data
        while (test.readInstance(testReader)) {

            testingEvaluation.evaluateModelOnce((Classifier) classifier, test.instance(0));
            test.delete(0);
        }
        testReader.close();

        text.append("\n\n"
                + testingEvaluation.toSummaryString("=== Error on test data ===\n", printComplexityStatistics));
    } else if (trainFileName.length() != 0) {

        // Testing is via cross-validation on training data
        Random random = new Random(seed);
        testingEvaluation.crossValidateModel(classifier, train, folds, random);
        if (template.classAttribute().isNumeric()) {
            text.append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n",
                    printComplexityStatistics));
        } else {
            text.append("\n\n\n" + testingEvaluation
                    .toSummaryString("=== Stratified " + "cross-validation ===\n", printComplexityStatistics));
        }
    }
    if (template.classAttribute().isNominal()) {
        if (classStatistics) {
            text.append("\n\n" + testingEvaluation.toClassDetailsString());
        }
        text.append("\n\n" + testingEvaluation.toMatrixString());
    }

    String result = "\t" + Utils.doubleToString(trainingEvaluation.pctCorrect(), 12, 4) + " %";
    result += "       " + Utils.doubleToString(testingEvaluation.pctCorrect(), 12, 4) + " %";

    String[] returnString = { text.toString(), result };
    return returnString;
}