feature_construction.GeneticProgramming.java Source code

Java tutorial

Introduction

Here is the source code for feature_construction.GeneticProgramming.java

Source

package feature_construction;
/*
 * This file is part of JGAP.
 *
 * JGAP offers a dual license model containing the LGPL as well as the MPL.
 *
 * For licensing information please see the file license.txt included with JGAP
 * or have a look at the top of class org.jgap.Chromosome which representatively
 * includes the JGAP license policy applicable for any file delivered with JGAP.
 */

import java.io.*;
import java.util.*;
import java.util.Map.Entry;

import org.jgap.*;
import org.jgap.gp.*;
import org.jgap.gp.function.*;
import org.jgap.gp.impl.*;
import org.jgap.gp.terminal.*;
import org.jgap.util.*;

import weka.core.Instances;
import weka.core.converters.CSVLoader;
import examples.gp.symbolicRegression.*;

/*
 *
 * Symbolic Regression in JGAP.
 *
 * This program is based on the JGAP example MathProblem.java with
 * some generality.
 *
 * TODO:
 *  - option for ignoring specific variables
 *  - option for stopping:
 *     - running forever
 *     - after a specific time,
 *     - when a specific fitness value is reached
 *  - calculate the number of data rows automatically (i.e. skip num_row)
 *  - show similiar solutions (with the same fitness as the best)
 *  - check if there are any more parameters in GPConfiguration to handle.
 *  - accept nominal values in the data section; then converted to
 *    numeric values.
 *  - add fitness metrics.
 *  - punish longer solutions
 *  - support for different "main" return classes, i.e. not just DoubleClass
 *  - correlation coefficient, and other statistical measures, e.g.
 *    R-squared, mean squared error, mean absolut error, minimum error,
 *    maximum error
 *  - more/better error checks
 *  - more building blocks, a la Eureqa http://ccsl.mae.cornell.edu/eureqa_ops
 *  - support for derivata (a la Eureqa)? This may be hard...
 *  - integrate with Weka?
 *  - simplify the best solution with a CAS?
 *
 * This program was written by Hakan Kjellerstrand (hakank@bonetmail.com)
 * Also, see my JGAP page http://www.hakank.org/jgap/
 *
 * @since 3.5
 */
public class GeneticProgramming extends GPProblem {
    //  private transient static Logger LOGGER = Logger.getLogger(SymbolicRegression.class);

    /*
     * public variables which may be changed by configuration file
     *
     */

    // number of variables to use (output variable is excluded)
    public static int numInputVariables;

    // the variables to use (of size numInputVariables)
    public static Variable[] variables;

    // variable name
    public static String[] variableNames;

    // index of the output variable
    public static Integer outputVariable; // default last

    public static int[] ignoreVariables; // TODO

    // constants
    public static ArrayList<Double> constants = new ArrayList<Double>();

    // size of data
    public static int numRows;

    // the data (as Double)
    // Note: the last row is the output variable per default
    protected static Double[][] data;

    // If we have found a perfect solution.
    public static boolean foundPerfect = false;

    // standard GP parameters
    public static int minInitDepth = 2;

    public static int maxInitDepth = 4;

    public static int populationSize = 100;

    public static int maxCrossoverDepth = 8;

    public static int programCreationMaxTries = 5;

    public static int numEvolutions = 1800;

    public static boolean verboseOutput = true;

    public static int maxNodes = 21;

    public static double functionProb = 0.9d;

    public static float reproductionProb = 0.1f; // float

    public static float mutationProb = 0.1f; // float

    public static double crossoverProb = 0.9d;

    public static float dynamizeArityProb = 0.08f; // float

    public static double newChromsPercent = 0.3d;

    public static int tournamentSelectorSize = 0;

    // lower/upper ranges for the Terminal
    public static double lowerRange = -10.0d;

    public static double upperRange = -10.0d;

    // Should the terminal be a wholenumber or not?
    public static boolean terminalWholeNumbers = true;

    public static String returnType = "DoubleClass"; // not used yet

    public static String presentation = "";

    // Using ADF
    public static int adfArity = 0;

    public static String adfType = "double";

    public static boolean useADF = false;

    // list of functions (as strings)
    public static String[] functions = { "Multiply", "Divide", "Add", "Subtract" };

    // list of functions for ADF
    public static String[] adfFunctions = { "Multiply3", "Divide", "Add3", "Subtract" };

    // Should we punish length of solutions?
    // Note: Very simplistic version.
    // public static boolean punishLength = false;

    // if the values are too small we may want to scale
    // the error
    public static double scaleError = -1.0d;

    // "bumping" is when we found a "perfect solution" and
    // want to see more "perfect solutions"
    public static boolean bumpPerfect = false;

    // the limit for which we should show all (different) solutions
    public static Double bumpValue = 0.0000;

    // checks for already shown solution when bumping
    private static HashMap<String, Integer> foundSolutions = new HashMap<String, Integer>();

    // timing
    public static long startTime;

    public static long endTime;

    // if > 0.0d -> stop if the fitness is below or equal
    // this value. TODO!
    public static double stopCriteria = -1.0d;

    public static boolean showPopulation = false;

    public static boolean showSimiliar = false;

    public GeneticProgramming(GPConfiguration a_conf) throws InvalidConfigurationException {
        super(a_conf);
    }

    /**
     * This method is used for setting up the commands and terminals that can be
     * used to solve the problem.
     *
     * @return GPGenotype
     * @throws InvalidConfigurationException
     */
    public GPGenotype create() throws InvalidConfigurationException {
        GPConfiguration conf = getGPConfiguration();
        // At first, we define the return type of the GP program.
        // ------------------------------------------------------
        // Then, we define the arguments of the GP parts. Normally, only for ADF's
        // there is a specification here, otherwise it is empty as in first case.
        // -----------------------------------------------------------------------
        Class[] types;
        Class[][] argTypes;
        if (useADF) {
            if ("boolean".equals(adfType)) {
                types = new Class[] { CommandGene.DoubleClass, CommandGene.BooleanClass };
            } else if ("integer".equals(adfType)) {
                types = new Class[] { CommandGene.DoubleClass, CommandGene.IntegerClass };
            } else {
                types = new Class[] { CommandGene.DoubleClass, CommandGene.DoubleClass };
            }
            Class[] adfs = new Class[adfArity];
            for (int i = 0; i < adfArity; i++) {
                if ("boolean".equals(adfType)) {
                    adfs[i] = CommandGene.BooleanClass;
                } else if ("integer".equals(adfType)) {
                    adfs[i] = CommandGene.IntegerClass;
                } else {
                    adfs[i] = CommandGene.DoubleClass;
                }
            }
            argTypes = new Class[][] { {}, adfs };
        } else {
            types = new Class[] { CommandGene.DoubleClass };
            argTypes = new Class[][] { {} };
        }
        // Configure desired minimum number of nodes per sub program.
        // Same as with types: First entry here corresponds with first entry in
        // nodeSets.
        // Configure desired maximum number of nodes per sub program.
        // First entry here corresponds with first entry in nodeSets.
        //
        // This is experimental!
        int[] minDepths;
        int[] maxDepths;
        if (useADF) {
            minDepths = new int[] { 1, 1 };
            maxDepths = new int[] { 9, 9 };
        } else {
            minDepths = new int[] { 1 };
            maxDepths = new int[] { 9 };
        }
        // Next, we define the set of available GP commands and terminals to use.
        // Please see package org.jgap.gp.function and org.jgap.gp.terminal
        // You can easily add commands and terminals of your own.
        // ----------------------------------------------------------------------
        CommandGene[] commands = makeCommands(conf, functions, lowerRange, upperRange, "plain");
        // Create the node sets
        int command_len = commands.length;
        CommandGene[][] nodeSets = new CommandGene[1][numInputVariables + command_len];
        // the variables:
        //  1) in the nodeSets matrix
        //  2) as variables (to be used for fitness checking)
        // --------------------------------------------------
        variables = new Variable[numInputVariables];
        int variableIndex = 0;
        for (int i = 0; i < numInputVariables + 1; i++) {
            String variableName = variableNames[i];
            if (i != outputVariable) {
                if (variableNames != null && variableNames.length > 0) {
                    variableName = variableNames[i];
                }
                variables[variableIndex] = Variable.create(conf, variableName, CommandGene.DoubleClass);
                nodeSets[0][variableIndex] = variables[variableIndex];
                System.out.println("input variable: " + variables[variableIndex]);
                variableIndex++;
            }
        }
        // assign the functions/terminals
        // ------------------------------
        for (int i = 0; i < command_len; i++) {
            System.out.println("function1: " + commands[i]);
            nodeSets[0][i + numInputVariables] = commands[i];
        }
        // ADF functions in the second array in nodeSets
        if (useADF) {
            CommandGene[] adfCommands = makeCommands(conf, adfFunctions, lowerRange, upperRange, "ADF");
            int adfLength = adfCommands.length;
            nodeSets[1] = new CommandGene[adfLength];
            for (int i = 0; i < adfLength; i++) {
                System.out.println("function2: " + adfCommands[i]);
                nodeSets[1][i] = adfCommands[i];
            }
        }
        // this is experimental.
        boolean[] full;
        if (useADF) {
            full = new boolean[] { true, true };
        } else {
            full = new boolean[] { true };
        }
        boolean[] fullModeAllowed = full;
        // Create genotype with initial population. Here, we use the
        // declarations made above:
        // ----------------------------------------------------------
        return GPGenotype.randomInitialGenotype(conf, types, argTypes, nodeSets, maxNodes, verboseOutput);
        // this is experimental
        // return GPGenotype.randomInitialGenotype(conf, types, argTypes, nodeSets,
        //                             minDepths,maxDepths, maxNodes, fullModeAllowed,verboseOutput);

    }

    public static Double[][] readCSVFile(String file) {
        try {
            BufferedReader inr = new BufferedReader(new FileReader(file));
            String str;
            int lineCount = 0;
            ArrayList<Double[]> theData = new ArrayList<Double[]>();
            //
            // read the lines
            //
            while ((str = inr.readLine()) != null) {
                if (lineCount == 0) {
                    continue;
                }
                lineCount++;
                str = str.trim();

                // Read the data rows
                // ------------------
                String[] dataRowStr = str.split("[\\s,]+");
                int len = dataRowStr.length;
                Double[] dataRow = new Double[len];
                for (int i = 0; i < len; i++) {
                    if (dataRowStr[i].equals("class-1")) {
                        dataRow[i] = Double.parseDouble("1");
                    } else if (dataRowStr[i].equals("class-2")) {
                        dataRow[i] = Double.parseDouble("2");
                    } else if (dataRowStr[i].equals("class-3")) {
                        dataRow[i] = Double.parseDouble("3");
                    } else {
                        System.out.println("dataRowStr[i] " + dataRowStr[i]);
                        dataRow[i] = Double.parseDouble(dataRowStr[i]);
                    }
                }
                theData.add(dataRow);
            } // end while
            inr.close();
            //
            // Now we know everything to be known.
            // Construct the matrix from the file.
            // -----------------------------------
            int r = theData.size();
            int c = theData.get(0).length;
            int numIgnore = 0;
            if (ignoreVariables != null) {
                // TODO: ignoreVariables should be a HashMap
                numIgnore = ignoreVariables.length;
                // c = c - numIgnore;
            }
            Double[][] dataTmp = new Double[r][c];
            // TODO: ignore the variables in ignoreVariables
            for (int i = 0; i < r; i++) {
                Double[] this_row = theData.get(i);
                for (int j = 0; j < c; j++) {
                    dataTmp[i][j] = this_row[j];
                }
            }
            // Since we calculate the error on the variable we
            // must transpose the data matrix
            // -----------------------------------------------
            return transposeMatrix(dataTmp);
        } catch (IOException e) {
            System.out.println(e);
            System.exit(1);
        }

        System.out.println("No Such CSV File.");
        return null;
    } // end readFile

    public static void readFile(String file) {
        try {
            BufferedReader inr = new BufferedReader(new FileReader(file));
            String str;
            int lineCount = 0;
            boolean gotData = false;
            ArrayList<Double[]> theData = new ArrayList<Double[]>();
            //
            // read the lines
            //
            while ((str = inr.readLine()) != null) {
                lineCount++;
                str = str.trim();
                // ignore empty lines or comments, i.e. lines starting with either # or %
                // ----------------------------------------------------------------------
                if (str.startsWith("#") || str.startsWith("%") || str.length() == 0) {
                    continue;
                }
                if ("data".equals(str)) {
                    gotData = true;
                    continue;
                }
                if (gotData) {
                    // Read the data rows
                    // ------------------
                    String[] dataRowStr = str.split("[\\s,]+");
                    int len = dataRowStr.length;
                    Double[] dataRow = new Double[len];
                    for (int i = 0; i < len; i++) {
                        dataRow[i] = Double.parseDouble(dataRowStr[i]);
                    }
                    theData.add(dataRow);
                } else {
                    // Check for parameters on the form
                    //    parameter: value(s)
                    // --------------------------------
                    if (str.contains(":")) {
                        String row[] = str.split(":\\s*");
                        // Now check each parameter
                        if ("return_type".equals(row[0])) {
                            returnType = row[1];
                        } else if ("presentation".equals(row[0])) {
                            presentation = row[1];
                        } else if ("num_input_variables".equals(row[0])) {
                            numInputVariables = Integer.parseInt(row[1]);
                        } else if ("num_rows".equals(row[0])) {
                            numRows = Integer.parseInt(row[1]);
                        } else if ("terminal_range".equals(row[0])) {
                            String[] ranges = row[1].split("\\s+");
                            lowerRange = Double.parseDouble(ranges[0]);
                            upperRange = Double.parseDouble(ranges[1]);
                        } else if ("terminal_wholenumbers".equals(row[0])) {
                            terminalWholeNumbers = Boolean.parseBoolean(row[1]);
                        } else if ("max_init_depth".equals(row[0])) {
                            maxInitDepth = Integer.parseInt(row[1]);
                        } else if ("min_init_depth".equals(row[0])) {
                            minInitDepth = Integer.parseInt(row[1]);
                        } else if ("program_creation_max_tries".equals(row[0])) {
                            programCreationMaxTries = Integer.parseInt(row[1]);
                        } else if ("population_size".equals(row[0])) {
                            populationSize = Integer.parseInt(row[1]);
                        } else if ("max_crossover_depth".equals(row[0])) {
                            maxCrossoverDepth = Integer.parseInt(row[1]);
                        } else if ("function_prob".equals(row[0])) {
                            functionProb = Double.parseDouble(row[1]);
                        } else if ("reproduction_prob".equals(row[0])) {
                            reproductionProb = Float.parseFloat(row[1]);
                        } else if ("mutation_prob".equals(row[0])) {
                            mutationProb = Float.parseFloat(row[1]);
                        } else if ("crossover_prob".equals(row[0])) {
                            crossoverProb = Double.parseDouble(row[1]);
                        } else if ("dynamize_arity_prob".equals(row[0])) {
                            dynamizeArityProb = Float.parseFloat(row[1]);
                        } else if ("new_chroms_percent".equals(row[0])) {
                            newChromsPercent = Double.parseDouble(row[1]);
                        } else if ("num_evolutions".equals(row[0])) {
                            numEvolutions = Integer.parseInt(row[1]);
                        } else if ("max_nodes".equals(row[0])) {
                            maxNodes = Integer.parseInt(row[1]);
                        } else if ("bump".equals(row[0])) {
                            bumpPerfect = Boolean.parseBoolean(row[1]);
                        } else if ("bump_value".equals(row[0])) {
                            bumpValue = Double.parseDouble(row[1]);
                        } else if ("functions".equals(row[0])) {
                            functions = row[1].split("[\\s,]+");
                        } else if ("adf_functions".equals(row[0])) {
                            adfFunctions = row[1].split("[\\s,]+");
                        } else if ("variable_names".equals(row[0])) {
                            variableNames = row[1].split("[\\s,]+");
                        } else if ("output_variable".equals(row[0])) {
                            outputVariable = Integer.parseInt(row[1]);
                        } else if ("ignore_variables".equals(row[0])) {
                            String[] ignoreVariablesS = row[1].split("[\\s,]+");
                            ignoreVariables = new int[ignoreVariablesS.length];
                            // TODO: make it a HashMap instead!
                            for (int i = 0; i < ignoreVariablesS.length; i++) {
                                ignoreVariables[i] = Integer.parseInt(ignoreVariablesS[i]);
                            }
                        } else if ("constant".equals(row[0])) {
                            Double constant = Double.parseDouble(row[1]);
                            constants.add(constant);
                        } else if ("adf_arity".equals(row[0])) {
                            adfArity = Integer.parseInt(row[1]);
                            System.out.println("ADF arity " + adfArity);
                            if (adfArity > 0) {
                                useADF = true;
                            }
                        } else if ("adf_type".equals(row[0])) {
                            adfType = row[1];
                            // } else if ("punish_length".equals(row[0])) {
                            //    punishLength = Boolean.parseBoolean(row[1]);

                        } else if ("tournament_selector_size".equals(row[0])) {
                            tournamentSelectorSize = Integer.parseInt(row[1]);
                        } else if ("scale_error".equals(row[0])) {
                            scaleError = Double.parseDouble(row[1]);
                        } else if ("stop_criteria".equals(row[0])) {
                            stopCriteria = Double.parseDouble(row[1]);
                        } else if ("show_population".equals(row[0])) {
                            showPopulation = Boolean.parseBoolean(row[1]);
                        } else if ("show_similiar".equals(row[0])) {
                            showSimiliar = Boolean.parseBoolean(row[1]);
                        } else {
                            System.out.println("Unknown keyword: " + row[0] + " on line " + lineCount);
                            System.exit(1);
                        }
                    }
                } // end if(gotData)
            } // end while
            inr.close();
            //
            // Now we know everything to be known.
            // Construct the matrix from the file.
            // -----------------------------------
            int r = theData.size();
            int c = theData.get(0).length;
            int numIgnore = 0;
            if (ignoreVariables != null) {
                // TODO: ignoreVariables should be a HashMap
                numIgnore = ignoreVariables.length;
                // c = c - numIgnore;
            }
            Double[][] dataTmp = new Double[r][c];
            // TODO: ignore the variables in ignoreVariables
            for (int i = 0; i < r; i++) {
                Double[] this_row = theData.get(i);
                for (int j = 0; j < c; j++) {
                    dataTmp[i][j] = this_row[j];
                }
            }
            // Since we calculate the error on the variable we
            // must transpose the data matrix
            // -----------------------------------------------
            data = transposeMatrix(dataTmp);
        } catch (IOException e) {
            System.out.println(e);
            System.exit(1);
        }
    } // end readFile

    //
    // Transpose matrix
    // ----------------
    public static Double[][] transposeMatrix(Double[][] m) {
        int r = m.length;
        int c = m[0].length;
        Double[][] t = new Double[c][r];
        for (int i = 0; i < r; ++i) {
            for (int j = 0; j < c; ++j) {
                t[j][i] = m[i][j];
            }
        }
        return t;
    } // end transposeMatrix

    /*
     *  makeCommands:
     *  makes the CommandGene array given the function listed in the
     *  configurations file
     *  ------------------------------------------------------------
     */
    static CommandGene[] makeCommands(GPConfiguration conf, String[] functions, Double lowerRange,
            Double upperRange, String type) {
        ArrayList<CommandGene> commandsList = new ArrayList<CommandGene>();
        int len = functions.length;
        try {
            for (int i = 0; i < len; i++) {
                //
                // Note: Not all functions are applicable here...
                //
                if ("Multiply".equals(functions[i])) {
                    commandsList.add(new Multiply(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Multiply(conf, CommandGene.BooleanClass));
                    }
                } else if ("Multiply3".equals(functions[i])) {
                    commandsList.add(new Multiply3(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Multiply3(conf, CommandGene.BooleanClass));
                    }
                } else if ("Add".equals(functions[i])) {
                    commandsList.add(new Add(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Add(conf, CommandGene.BooleanClass));
                    }
                } else if ("Divide".equals(functions[i])) {
                    commandsList.add(new Divide(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Divide(conf, CommandGene.BooleanClass));
                    }
                } else if ("Add3".equals(functions[i])) {
                    commandsList.add(new Add3(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Add3(conf, CommandGene.BooleanClass));
                    }
                } else if ("Add4".equals(functions[i])) {
                    commandsList.add(new Add4(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Add4(conf, CommandGene.BooleanClass));
                    }
                } else if ("Subtract".equals(functions[i])) {
                    commandsList.add(new Subtract(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Subtract(conf, CommandGene.BooleanClass));
                    }
                } else if ("Sine".equals(functions[i])) {
                    commandsList.add(new Sine(conf, CommandGene.DoubleClass));
                } else if ("ArcSine".equals(functions[i])) {
                    commandsList.add(new ArcSine(conf, CommandGene.DoubleClass));
                } else if ("Tangent".equals(functions[i])) {
                    commandsList.add(new Tangent(conf, CommandGene.DoubleClass));
                } else if ("ArcTangent".equals(functions[i])) {
                    commandsList.add(new ArcTangent(conf, CommandGene.DoubleClass));
                } else if ("Cosine".equals(functions[i])) {
                    commandsList.add(new Cosine(conf, CommandGene.DoubleClass));
                } else if ("ArcCosine".equals(functions[i])) {
                    commandsList.add(new ArcCosine(conf, CommandGene.DoubleClass));
                } else if ("Exp".equals(functions[i])) {
                    commandsList.add(new Exp(conf, CommandGene.DoubleClass));
                } else if ("Log".equals(functions[i])) {
                    commandsList.add(new Log(conf, CommandGene.DoubleClass));
                } else if ("Abs".equals(functions[i])) {
                    commandsList.add(new Abs(conf, CommandGene.DoubleClass));
                } else if ("Pow".equals(functions[i])) {
                    commandsList.add(new Pow(conf, CommandGene.DoubleClass));
                } else if ("Round".equals(functions[i])) {
                    commandsList.add(new Round(conf, CommandGene.DoubleClass));
                } else if ("Ceil".equals(functions[i])) {
                    commandsList.add(new Ceil(conf, CommandGene.DoubleClass));
                } else if ("Floor".equals(functions[i])) {
                    commandsList.add(new Floor(conf, CommandGene.DoubleClass));
                } else if ("Modulo".equals(functions[i])) {
                    commandsList.add(new Modulo(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Modulo(conf, CommandGene.BooleanClass));
                    }
                } else if ("ModuloD".equals(functions[i])) {
                    commandsList.add(new ModuloD(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new ModuloD(conf, CommandGene.BooleanClass));
                    }
                } else if ("Max".equals(functions[i])) {
                    commandsList.add(new Max(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Max(conf, CommandGene.BooleanClass));
                    }
                } else if ("Min".equals(functions[i])) {
                    commandsList.add(new Min(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Min(conf, CommandGene.BooleanClass));
                    }
                } else if ("Sqrt".equals(functions[i])) {
                    // Note: This uses my Sqrt.java file
                    commandsList.add(new Sqrt(conf, CommandGene.DoubleClass));
                } else if ("Logistic".equals(functions[i])) {
                    // Note: This uses my Logistic.java file
                    commandsList.add(new Logistic(conf, CommandGene.DoubleClass));
                } else if ("Gaussian".equals(functions[i])) {
                    // Note: This uses my Gaussian.java file
                    commandsList.add(new Gaussian(conf, CommandGene.DoubleClass));
                } else if ("Sigmoid".equals(functions[i])) {
                    // Note: This uses my Sigmoid.java file
                    commandsList.add(new Sigmoid(conf, CommandGene.DoubleClass));
                } else if ("Gamma".equals(functions[i])) {
                    // Note: This uses my Gamma.java file
                    commandsList.add(new Gamma(conf, CommandGene.DoubleClass));
                } else if ("Step".equals(functions[i])) {
                    // Note: This uses my Step.java file
                    commandsList.add(new Step(conf, CommandGene.DoubleClass));
                } else if ("Sign".equals(functions[i])) {
                    // Note: This uses my Sign.java file
                    commandsList.add(new Sign(conf, CommandGene.DoubleClass));
                } else if ("Hill".equals(functions[i])) {
                    // Note: This uses my Hill.java file
                    commandsList.add(new Hill(conf, CommandGene.DoubleClass));
                } else if ("LesserThan".equals(functions[i])) {
                    // experimental
                    commandsList.add(new LesserThan(conf, CommandGene.BooleanClass));
                } else if ("GreaterThan".equals(functions[i])) {
                    // experimental
                    commandsList.add(new GreaterThan(conf, CommandGene.BooleanClass));
                } else if ("If".equals(functions[i])) {
                    // Note: This is just If on DoubleClass, not a proper Boolean
                    commandsList.add(new If(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new If(conf, CommandGene.BooleanClass));
                    }
                } else if ("IfElse".equals(functions[i])) {
                    commandsList.add(new IfElse(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new IfElse(conf, CommandGene.BooleanClass));
                    }
                } else if ("IfDyn".equals(functions[i])) {
                    // Well, this don't work as expected...
                    // System.out.println("IfDyn is not supported yet");
                    commandsList.add(new IfDyn(conf, CommandGene.BooleanClass, 1, 1, 5));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new IfDyn(conf, CommandGene.DoubleClass, 1, 1, 5));
                    }
                } else if ("Loop".equals(functions[i])) { // experimental
                    commandsList.add(new Loop(conf, CommandGene.DoubleClass, 3));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Loop(conf, CommandGene.BooleanClass, 3));
                    }
                } else if ("Equals".equals(functions[i])) {
                    // experimental
                    // commandsList.add(new Equals(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Equals(conf, CommandGene.BooleanClass));
                    }
                } else if ("ForXLoop".equals(functions[i])) {
                    // experimental
                    commandsList.add(new ForXLoop(conf, CommandGene.IntegerClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new ForXLoop(conf, CommandGene.BooleanClass));
                    } else if (useADF && "integer".equals(adfType)) {
                        commandsList.add(new ForXLoop(conf, CommandGene.IntegerClass));
                    }
                } else if ("ForLoop".equals(functions[i])) {
                    // experimental
                    commandsList.add(new ForLoop(conf, CommandGene.IntegerClass, 10));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new ForLoop(conf, CommandGene.BooleanClass, 10));
                    } else if (useADF && "integer".equals(adfType)) {
                        commandsList.add(new ForLoop(conf, CommandGene.IntegerClass, 10));
                    }
                } else if ("Increment".equals(functions[i])) {
                    commandsList.add(new Increment(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Increment(conf, CommandGene.BooleanClass));
                    }
                } else if ("Argument".equals(functions[i])) {
                    // experimental
                    /*
                    commandsList.add(new Argument(conf, 1, CommandGene.DoubleClass));
                           if ("boolean".equals(adfType)) {
                    commandsList.add(new Argument(conf, 1, CommandGene.BooleanClass));
                           }
                     */

                } else if ("StoreTerminal".equals(functions[i])) {
                    // experimental
                    commandsList.add(new StoreTerminal(conf, "dmem0", CommandGene.DoubleClass));
                    commandsList.add(new StoreTerminal(conf, "dmem1", CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new StoreTerminal(conf, "bmem0", CommandGene.DoubleClass));
                        commandsList.add(new StoreTerminal(conf, "bmem1", CommandGene.DoubleClass));
                    }
                } else if ("Pop".equals(functions[i])) {
                    // experimental
                    // commandsList.add(new Pop(conf, CommandGene.DoubleClass));
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Pop(conf, CommandGene.BooleanClass));
                    }
                } else if ("Push".equals(functions[i])) {
                    // experimental
                    commandsList.add(new Push(conf, CommandGene.DoubleClass));
                } else if ("And".equals(functions[i])) {
                    // experimental
                    commandsList.add(new And(conf));
                } else if ("Or".equals(functions[i])) {
                    // experimental
                    commandsList.add(new Or(conf));
                } else if ("Xor".equals(functions[i])) {
                    // experimental
                    commandsList.add(new Xor(conf));
                } else if ("Not".equals(functions[i])) {
                    // experimental
                    commandsList.add(new Not(conf));
                } else if ("AndD".equals(functions[i])) {
                    // experimental
                    commandsList.add(new AndD(conf));
                } else if ("OrD".equals(functions[i])) {
                    // experimental
                    commandsList.add(new OrD(conf));
                } else if ("XorD".equals(functions[i])) {
                    // experimental
                    commandsList.add(new XorD(conf));
                } else if ("NotD".equals(functions[i])) {
                    // experimental
                    commandsList.add(new NotD(conf));
                } else if ("SubProgram".equals(functions[i])) {
                    // experimental
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new SubProgram(conf,
                                new Class[] { CommandGene.BooleanClass, CommandGene.BooleanClass }));
                        commandsList.add(new SubProgram(conf, new Class[] { CommandGene.BooleanClass,
                                CommandGene.BooleanClass, CommandGene.BooleanClass }));
                    }
                    commandsList.add(
                            new SubProgram(conf, new Class[] { CommandGene.DoubleClass, CommandGene.DoubleClass }));
                    commandsList.add(new SubProgram(conf, new Class[] { CommandGene.DoubleClass,
                            CommandGene.DoubleClass, CommandGene.DoubleClass }));
                } else if ("Tupel".equals(functions[i])) {
                    // experimental
                    if (useADF && "boolean".equals(adfType)) {
                        commandsList.add(new Tupel(conf,
                                new Class[] { CommandGene.BooleanClass, CommandGene.BooleanClass }));
                    }
                } else {
                    System.out.println("Unkown function: " + functions[i]);
                    System.exit(1);
                }
            }
            commandsList
                    .add(new Terminal(conf, CommandGene.DoubleClass, lowerRange, upperRange, terminalWholeNumbers));
            // commandsList.add(new Terminal(conf, CommandGene.BooleanClass, lowerRange, upperRange, terminalWholeNumbers));

            // ADF
            // Just add the ADF to the "normal" command list (i.e. not to the ADF list)
            if (useADF && !"ADF".equals(type)) {
                commandsList.add(new ADF(conf, 1, adfArity));
            }
            if (constants != null) {
                for (int i = 0; i < constants.size(); i++) {
                    Double constant = constants.get(i);
                    commandsList.add(new Constant(conf, CommandGene.DoubleClass, constant));
                }
            }
        } catch (Exception e) {
            System.out.println(e);
        }
        CommandGene[] commands = new CommandGene[commandsList.size()];
        commandsList.toArray(commands);
        return commands;
    }

    public static double[][] convertInstancesToInputFeaturesArray(String fileName) {
        // Create instances (file that contains the inputs to feed through the program)
        double[][] inputFeatures;

        try {
            //load CSV
            CSVLoader loaderInputs = new CSVLoader();
            loaderInputs.setSource(new File(fileName));
            Instances inputSet = loaderInputs.getDataSet();
            inputSet.setClassIndex(inputSet.numAttributes() - 1);

            inputFeatures = new double[inputSet.numInstances()][inputSet.numAttributes()];

            // Convert instances to double[][]
            for (int i = 0; i < inputSet.numInstances(); i++) {
                for (int j = 0; j < inputSet.numAttributes(); j++) {
                    inputFeatures[i][j] = inputSet.get(i).value(j);
                }
            }

            return inputFeatures;
        } catch (Exception e) {
            e.printStackTrace();
        }

        return null;
    }

    public static void constructFeatures(IGPProgram ind, String inputFileName, String outputFileName) {
        try {
            Object[] noargs = new Object[0];
            double[][] inputSet = convertInstancesToInputFeaturesArray(inputFileName);
            PrintWriter featureWriter = new PrintWriter(outputFileName);

            for (int j = 0; j < inputSet.length; j++) {
                // set all the input variables
                int variableIndex = 0;
                for (int i = 0; i < inputSet[0].length; i++) {
                    if (i != outputVariable) {
                        variables[variableIndex].set(inputSet[j][i]);
                        variableIndex++;
                    }
                }

                try {
                    double result = ind.execute_double(0, noargs);
                    int classIndexValue = (int) (inputSet[j][outputVariable]);

                    // Write results to File
                    // Write Header
                    if (j == 0) {
                        String featureName = "feature";
                        featureWriter.print(featureName + "-" + 1 + " ," + " class \n");
                    }
                    // Write contents
                    featureWriter.write(result + ", " + " class-" + classIndexValue + "\n");
                    featureWriter.flush();

                } catch (ArithmeticException ex) {
                    // This should not happen, some illegal operation was executed.
                    // ------------------------------------------------------------
                    System.out.println(ind);
                    throw ex;
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * Starts the example.
     *
     * @author Hakan Kjellerstrand
     * @return
     */
    public static IGPProgram main(String[] args) throws Exception {
        // Use the log4j configuration
        // Log to stdout instead of file
        // -----------------------------
        //        org.apache.log4j.PropertyConfigurator.configure("log4j.properties");
        //      LOGGER.addAppender(new ConsoleAppender(new SimpleLayout(), "System.out"));
        //
        // Read a configuration file, or not...
        //
        if (args.length > 0) {
            String filename = args[0];//e.g. "fahrenheit_celsius.conf"
            readFile(filename);
        } else {
            // Default problem
            // Fibonacci series, with three input variables to make it
            // somewhat harder.
            // -------------------------------------------------------
            numRows = 21;
            numInputVariables = 3;
            // Note: The last array is the output array
            int[][] indata = {
                    { 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765,
                            10946 },
                    { 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946,
                            17711 },
                    { 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711,
                            28657 },
                    { 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711,
                            28657, 46368 } };
            data = new Double[numInputVariables + 1][numRows];
            for (int i = 0; i < numInputVariables + 1; i++) {
                for (int j = 0; j < numRows; j++) {
                    data[i][j] = new Double(indata[i][j]);
                }
            }
            functions = "Multiply,Divide,Add,Subtract".split(",");
            variableNames = "F1,F2,F3,F4".split(",");
            presentation = "Fibonacci series";
        }
        // Present the problem
        // -------------------
        System.out.println("Presentation: " + presentation);
        if (outputVariable == null) {
            outputVariable = numInputVariables;
        }
        if (variableNames == null) {
            variableNames = new String[numInputVariables + 1];
            for (int i = 0; i < numInputVariables + 1; i++) {
                variableNames[i] = "V" + i;
            }
        }
        System.out
                .println("output_variable: " + variableNames[outputVariable] + " (index: " + outputVariable + ")");
        // Setup the algorithm's parameters.
        // ---------------------------------
        GPConfiguration config = new GPConfiguration();
        // We use a delta fitness evaluator because we compute a defect rate, not
        // a point score!
        // ----------------------------------------------------------------------
        config.setGPFitnessEvaluator(new DeltaGPFitnessEvaluator());
        config.setMaxInitDepth(maxInitDepth);
        config.setPopulationSize(populationSize);
        // Default selectionMethod is is TournamentSelector(3)
        if (tournamentSelectorSize > 0) {
            config.setSelectionMethod(new TournamentSelector(tournamentSelectorSize));
        }
        /**
         * The maximum depth of an individual resulting from crossover.
         */
        config.setMaxCrossoverDepth(maxCrossoverDepth);
        config.setFitnessFunction(new GeneticProgramming.FormulaFitnessFunction());
        /**
         * @param a_strict true: throw an error during evolution in case a situation
         * is detected where no function or terminal of a required type is declared
         * in the GPConfiguration; false: don't throw an error but try a completely
         * different combination of functions and terminals
         */
        // config.setStrictProgramCreation(true);
        config.setStrictProgramCreation(false);
        // Default from GPConfiguration.java

        /**
         * In crossover: If random number (0..1) < this value, then choose a function
         * otherwise a terminal.
         */
        config.setFunctionProb(functionProb);
        /**
         * The probability that a reproduction operation is chosen during evolution.
         * Must be between 0.0d and 1.0d. crossoverProb + reproductionProb must equal
         * 1.0d.
         */
        config.setReproductionProb(reproductionProb);
        /**
         * The probability that a node is mutated during growing a program.
         */
        config.setMutationProb(mutationProb);
        /**
         * The probability that the arity of a node is changed during growing a
         * program.
         */
        config.setDynamizeArityProb(dynamizeArityProb);
        /**
         * Percentage of the population that will be filled with new individuals
         * during evolution. Must be between 0.0d and 1.0d.
         */
        config.setNewChromsPercent(newChromsPercent);
        /**
         * The minimum depth of an individual when the world is created.
         */
        config.setMinInitDepth(minInitDepth);
        /**
         * If m_strictProgramCreation is false: Maximum number of tries to construct
         * a valid program.
         */
        config.setProgramCreationMaxTries(programCreationMaxTries);
        GPProblem problem = new GeneticProgramming(config);
        // Create the genotype of the problem, i.e., define the GP commands and
        // terminals that can be used, and constrain the structure of the GP
        // program.
        // --------------------------------------------------------------------
        GPGenotype gp = problem.create();
        // gp.setVerboseOutput(true);
        gp.setVerboseOutput(false);
        startTime = System.currentTimeMillis();
        // Start the computation with maximum 800 evolutions.
        // if a satisfying result is found (fitness value almost 0), JGAP stops
        // earlier automatically.
        // --------------------------------------------------------------------
        // gp.evolve(numEvolutions);

        //
        // I'm rolling my own to to be able to control output better etc.
        //
        System.out.println("Creating initial population");
        System.out.println("Mem free: " + SystemKit.niceMemory(SystemKit.getTotalMemoryMB()) + " MB");
        IGPProgram fittest = null;
        double bestFit = -1.0d;
        String bestProgram = "";
        int bestGen = 0;
        HashMap<String, Integer> similiar = null;
        if (showSimiliar) {
            similiar = new HashMap<String, Integer>();
        }
        for (int gen = 1; gen <= numEvolutions; gen++) {
            gp.evolve(); // evolve one generation
            gp.calcFitness();
            GPPopulation pop = gp.getGPPopulation();
            IGPProgram thisFittest = pop.determineFittestProgram();
            // TODO: Here I would like to have the correlation coefficient etc
            thisFittest.setApplicationData((Object) ("gen" + gen));
            ProgramChromosome chrom = thisFittest.getChromosome(0);
            String program = chrom.toStringNorm(0);
            double fitness = thisFittest.getFitnessValue();
            if (showSimiliar || showPopulation) {
                if (showPopulation) {
                    System.out.println("Generation " + gen + " (show whole population, sorted)");
                }
                pop.sortByFitness();
                for (IGPProgram p : pop.getGPPrograms()) {
                    double fit = p.getFitnessValue();
                    if (showSimiliar && fit <= bestFit) {
                        String prog = p.toStringNorm(0);
                        if (!similiar.containsKey(prog)) {
                            similiar.put(prog, 1);
                        } else {
                            similiar.put(prog, similiar.get(prog) + 1);
                        }
                    }
                    if (showPopulation) {
                        String prg = p.toStringNorm(0);
                        int sz = p.size();
                        System.out.println("\tprogram: " + prg + " fitness: " + fit);
                    }
                }
            }
            //
            // Yes, I have to think more about this....
            // Right now a program is printed if it has
            // better fitness value than the former best solution.

            // if (gen % 25 == 0) {
            //    myOutputSolution(fittest, gen);
            // }
            if (bestFit < 0.0d || fitness < bestFit) {
                bestGen = gen;
                myOutputSolution(thisFittest, gen);
                bestFit = fitness;
                bestProgram = program;
                fittest = thisFittest;
                if (showSimiliar) {
                    // reset the hash
                    similiar.clear(); // = new HashMap<String,Integer>();
                }
                // Ensure that the best solution is in the population.
                // gp.addFittestProgram(thisFittest);
            } else {
                /*
                if (gen % 25 == 0 && gen != numEvolutions) {
                System.out.println("Generation " + gen + " (This is a keep alive message.)");
                // myOutputSolution(fittest, gen);
                     }
                 */
            }
        }

        // Print the best solution so far to the console.
        // ----------------------------------------------
        // gp.outputSolution(gp.getAllTimeBest());

        System.out.println("\nAll time best (from generation " + bestGen + ")");
        myOutputSolution(fittest, numEvolutions);
        System.out.println("applicationData: " + fittest.getApplicationData());
        // Create a graphical tree of the best solution's program and write it to
        // a PNG file.
        // ----------------------------------------------------------------------
        // problem.showTree(gp.getAllTimeBest(), "mathproblem_best.png");

        endTime = System.currentTimeMillis();
        long elapsedTime = endTime - startTime;
        System.out.println("\nTotal time " + elapsedTime + "ms");
        if (showSimiliar) {
            System.out.println("\nAll solutions with the best fitness (" + bestFit + "):");
            // TODO: These should be sorted by values.
            for (String p : similiar.keySet()) {
                System.out.println(p + " (" + similiar.get(p) + ")");
            }
        }

        return fittest;
        //      System.exit(0);
    }

    public static boolean range(double low, double high, double n) {
        return n >= low && n <= high;
    }

    public static void addToEntropy(HashMap<Integer, Integer> entropy, int classRange) {
        if (entropy.get(classRange) == null) {
            entropy.put(classRange, 1);
        } else {
            entropy.put(classRange, entropy.get(classRange) + 1);
        }
    }

    /**
     * Fitness function for evaluating the produced formulas, represented as GP
     * programs. The fitness is computed by calculating the result (Y) of the
     * function/formula for integer inputs 0 to 20 (X). The sum of the differences
     * between expected Y and actual Y is the fitness, the lower the better (as
     * it is a defect rate here).
     */
    public static class FormulaFitnessFunction extends GPFitnessFunction {
        protected double evaluate(final IGPProgram a_subject) {
            return computeRawFitness(a_subject);
        }

        public double computeRawFitness(final IGPProgram ind) {
            double error = 0.0f;
            Object[] noargs = new Object[0];
            // classes
            EntropyClass[] classes = new EntropyClass[3];
            // Initialise all the classes
            for (int i = 0; i < classes.length; i++) {
                classes[i] = new EntropyClass();
            }

            // Evaluate function for the input numbers
            // --------------------------------------------
            // double[] results  =  new double[numRows];
            for (int j = 0; j < numRows; j++) {
                // Provide the variable X with the input number.
                // See method create(), declaration of "nodeSets" for where X is
                // defined.
                // -------------------------------------------------------------

                // set all the input variables
                int variableIndex = 0;
                for (int i = 0; i < numInputVariables + 1; i++) {
                    if (i != outputVariable) {
                        variables[variableIndex].set(data[i][j]);
                        variableIndex++;
                    }
                }

                try {
                    double result = ind.execute_double(0, noargs);
                    int actualClassIndex = (data[outputVariable][j]).intValue() - 1;

                    // Bound intervals
                    double max = 10000;
                    if (result > max || result < -max || Double.isNaN(result)) {
                        return max;
                    }

                    // Within Interval Ranges
                    if (range(-max / 2, 0, result)) {
                        classes[0].addToFrequency();
                    } else if (range(0, max / 2, result)) {
                        classes[1].addToFrequency();
                    } else {
                        classes[2].addToFrequency();
                    }

                    // Total frequency
                    classes[actualClassIndex].addToNumberOfTotalClasses();

                    // If the error is too high, stop evaluation and return worst error
                    // possible.
                    // ----------------------------------------------------------------
                    if (Double.isInfinite(error)) {
                        return Double.MAX_VALUE;
                    }
                } catch (ArithmeticException ex) {
                    // This should not happen, some illegal operation was executed.
                    // ------------------------------------------------------------
                    System.out.println(ind);
                    throw ex;
                }
            }

            error = 0.0;
            // Calculate Shannons Entropy
            for (int i = 0; i < classes.length; i++) {
                error += classes[i].calculateEntropy(i);
            }
            error = Math.abs(error);

            // If the fitness is very close to 0.0 then we maybe bump it
            // up to see alternative solutions.
            // -------------------------------------------------------
            if (error <= bumpValue && bumpPerfect) {
                if (!foundPerfect) {
                    System.out.println("Found a perfect solution with err " + error + "!. Bump up the values!");
                    foundPerfect = true;
                }
                ProgramChromosome chrom = ind.getChromosome(0);
                String program = chrom.toStringNorm(0);
                if (!foundSolutions.containsKey(program)) {
                    System.out.println("PROGRAM:" + program + " error: " + error);
                    foundSolutions.put(program, 1);
                } else {
                    // TODO: We may want to show the number of hits
                    // after the run...
                    foundSolutions.put(program, foundSolutions.get(program) + 1);
                }
                error = 0.1d;
            }

            if (scaleError > 0.0d) {
                return error * scaleError;
            } else {
                return error;
            }
        }
    }

    /**
     * Outputs the best solution until now at standard output.
     *
     * This is stolen (and somewhat edited) from GPGenotype.outputSolution
     * which used log4j.
     *
     * @param a_best the fittest ProgramChromosome
     *
     * @author Hakan Kjellerstrand (originally by Klaus Meffert)
     */
    public static void myOutputSolution(IGPProgram a_best, int gen) {
        String freeMB = SystemKit.niceMemory(SystemKit.getFreeMemoryMB());
        System.out
                .println("Evolving generation " + (gen) + "/" + numEvolutions + ", memory free: " + freeMB + " MB");
        if (a_best == null) {
            System.out.println("No best solution (null)");
            return;
        }
        double bestValue = a_best.getFitnessValue();
        if (Double.isInfinite(bestValue)) {
            System.out.println("No best solution (infinite)");
            return;
        }
        System.out.println("Best solution fitness: " + NumberKit.niceDecimalNumber(bestValue, 2));
        System.out.println("Best solution: " + a_best.toStringNorm(0));
        String depths = "";
        int size = a_best.size();
        for (int i = 0; i < size; i++) {
            if (i > 0) {
                depths += " / ";
            }
            depths += a_best.getChromosome(i).getDepth(0);
        }
        if (size == 1) {
            System.out.println("Depth of chrom: " + depths);
        } else {
            System.out.println("Depths of chroms: " + depths);
        }
    }
}