tr.gov.ulakbim.jDenetX.streams.generators.AgrawalGenerator.java Source code

Java tutorial

Introduction

Here is the source code for tr.gov.ulakbim.jDenetX.streams.generators.AgrawalGenerator.java

Source

/*
 *    AgrawalGenerator.java
 *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
 *    @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
 *
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
package tr.gov.ulakbim.jDenetX.streams.generators;

import tr.gov.ulakbim.jDenetX.core.InstancesHeader;
import tr.gov.ulakbim.jDenetX.core.ObjectRepository;
import tr.gov.ulakbim.jDenetX.options.AbstractOptionHandler;
import tr.gov.ulakbim.jDenetX.options.FlagOption;
import tr.gov.ulakbim.jDenetX.options.FloatOption;
import tr.gov.ulakbim.jDenetX.options.IntOption;
import tr.gov.ulakbim.jDenetX.streams.InstanceStream;
import tr.gov.ulakbim.jDenetX.tasks.TaskMonitor;
import weka.core.*;

import java.util.Random;

// Generator described in paper:
//   Rakesh Agrawal, Tomasz Imielinksi, and Arun Swami,
//    "Database Mining: A Performance Perspective", 
//     IEEE Transactions on Knowledge and Data Engineering,
//      5(6), December 1993.
// Public C source code available at:

//   http://www.almaden.ibm.com/cs/projects/iis/hdb/Projects/data_mining/datasets/syndata.html
//
// Notes:
// The built in functions are based on the paper (page 924),
//  which turn out to be functions pred20 thru pred29 in the public C implementation
// Perturbation function works like C implementation rather than description in paper
public class AgrawalGenerator extends AbstractOptionHandler implements InstanceStream {

    @Override
    public String getPurposeString() {
        return "Generates one of ten different pre-defined loan functions.";
    }

    private static final long serialVersionUID = 1L;

    public IntOption functionOption = new IntOption("function", 'f',
            "Classification function used, as defined in the original paper.", 1, 1, 10);

    public IntOption instanceRandomSeedOption = new IntOption("instanceRandomSeed", 'i',
            "Seed for random generation of instances.", 1);

    public FloatOption peturbFractionOption = new FloatOption("peturbFraction", 'p',
            "The amount of peturbation (noise) introduced to numeric values.", 0.05, 0.0, 1.0);

    public FlagOption balanceClassesOption = new FlagOption("balanceClasses", 'b',
            "Balance the number of instances of each class.");

    protected interface ClassFunction {
        public int determineClass(double salary, double commission, int age, int elevel, int car, int zipcode,
                double hvalue, int hyears, double loan);
    }

    protected static ClassFunction[] classificationFunctions = {
            // function 1
            new ClassFunction() {
                public int determineClass(double salary, double commission, int age, int elevel, int car,
                        int zipcode, double hvalue, int hyears, double loan) {
                    return ((age < 40) || (60 <= age)) ? 0 : 1;
                }
            },
            // function 2
            new ClassFunction() {
                public int determineClass(double salary, double commission, int age, int elevel, int car,
                        int zipcode, double hvalue, int hyears, double loan) {
                    if (age < 40) {
                        return ((50000 <= salary) && (salary <= 100000)) ? 0 : 1;
                    } else if (age < 60) {// && age >= 40
                        return ((75000 <= salary) && (salary <= 125000)) ? 0 : 1;
                    } else {// age >= 60
                        return ((25000 <= salary) && (salary <= 75000)) ? 0 : 1;
                    }
                }
            },
            // function 3
            new ClassFunction() {
                public int determineClass(double salary, double commission, int age, int elevel, int car,
                        int zipcode, double hvalue, int hyears, double loan) {
                    if (age < 40) {
                        return ((elevel == 0) || (elevel == 1)) ? 0 : 1;
                    } else if (age < 60) { // && age >= 40
                        return ((elevel == 1) || (elevel == 2) || (elevel == 3)) ? 0 : 1;
                    } else { // age >= 60
                        return ((elevel == 2) || (elevel == 3) || (elevel == 4)) ? 0 : 1;
                    }
                }
            },
            // function 4
            new ClassFunction() {
                public int determineClass(double salary, double commission, int age, int elevel, int car,
                        int zipcode, double hvalue, int hyears, double loan) {
                    if (age < 40) {
                        if ((elevel == 0) || (elevel == 1)) {
                            return ((25000 <= salary) && (salary <= 75000)) ? 0 : 1;
                        }
                        return ((50000 <= salary) && (salary <= 100000)) ? 0 : 1;
                    } else if (age < 60) {// && age >= 40
                        if ((elevel == 1) || (elevel == 2) || (elevel == 3)) {
                            return ((50000 <= salary) && (salary <= 100000)) ? 0 : 1;
                        }
                        return ((75000 <= salary) && (salary <= 125000)) ? 0 : 1;
                    } else {// age >= 60
                        if ((elevel == 2) || (elevel == 3) || (elevel == 4)) {
                            return ((50000 <= salary) && (salary <= 100000)) ? 0 : 1;
                        }
                        return ((25000 <= salary) && (salary <= 75000)) ? 0 : 1;
                    }
                }
            },
            // function 5
            new ClassFunction() {
                public int determineClass(double salary, double commission, int age, int elevel, int car,
                        int zipcode, double hvalue, int hyears, double loan) {
                    if (age < 40) {
                        if ((50000 <= salary) && (salary <= 100000)) {
                            return ((100000 <= loan) && (loan <= 300000)) ? 0 : 1;
                        }
                        return ((200000 <= loan) && (loan <= 400000)) ? 0 : 1;
                    } else if (age < 60) {// && age >= 40
                        if ((75000 <= salary) && (salary <= 125000)) {
                            return ((200000 <= loan) && (loan <= 400000)) ? 0 : 1;
                        }
                        return ((300000 <= loan) && (loan <= 500000)) ? 0 : 1;
                    } else {// age >= 60
                        if ((25000 <= salary) && (salary <= 75000)) {
                            return ((300000 <= loan) && (loan <= 500000)) ? 0 : 1;
                        }
                        return ((100000 <= loan) && (loan <= 300000)) ? 0 : 1;
                    }
                }
            },
            // function 6
            new ClassFunction() {
                public int determineClass(double salary, double commission, int age, int elevel, int car,
                        int zipcode, double hvalue, int hyears, double loan) {
                    double totalSalary = salary + commission;
                    if (age < 40) {
                        return ((50000 <= totalSalary) && (totalSalary <= 100000)) ? 0 : 1;
                    } else if (age < 60) {// && age >= 40
                        return ((75000 <= totalSalary) && (totalSalary <= 125000)) ? 0 : 1;
                    } else {// age >= 60
                        return ((25000 <= totalSalary) && (totalSalary <= 75000)) ? 0 : 1;
                    }
                }
            },
            // function 7
            new ClassFunction() {
                public int determineClass(double salary, double commission, int age, int elevel, int car,
                        int zipcode, double hvalue, int hyears, double loan) {
                    double disposable = (2.0 * (salary + commission) / 3.0 - loan / 5.0 - 20000.0);
                    return disposable > 0 ? 0 : 1;
                }
            },
            // function 8
            new ClassFunction() {
                public int determineClass(double salary, double commission, int age, int elevel, int car,
                        int zipcode, double hvalue, int hyears, double loan) {
                    double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0 * elevel - 20000.0);
                    return disposable > 0 ? 0 : 1;
                }
            },
            // function 9
            new ClassFunction() {
                public int determineClass(double salary, double commission, int age, int elevel, int car,
                        int zipcode, double hvalue, int hyears, double loan) {
                    double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0 * elevel - loan / 5.0
                            - 10000.0);
                    return disposable > 0 ? 0 : 1;
                }
            },
            // function 10
            new ClassFunction() {
                public int determineClass(double salary, double commission, int age, int elevel, int car,
                        int zipcode, double hvalue, int hyears, double loan) {
                    double equity = 0.0;
                    if (hyears >= 20) {
                        equity = hvalue * (hyears - 20.0) / 10.0;
                    }
                    double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0 * elevel + equity / 5.0
                            - 10000.0);
                    return disposable > 0 ? 0 : 1;
                }
            } };

    protected InstancesHeader streamHeader;

    protected Random instanceRandom;

    protected boolean nextClassShouldBeZero;

    @Override
    protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {
        // generate header
        FastVector attributes = new FastVector();
        attributes.addElement(new Attribute("salary"));
        attributes.addElement(new Attribute("commission"));
        attributes.addElement(new Attribute("age"));
        FastVector elevelLabels = new FastVector();
        for (int i = 0; i < 5; i++) {
            elevelLabels.addElement("level" + i);
        }
        attributes.addElement(new Attribute("elevel", elevelLabels));
        FastVector carLabels = new FastVector();
        for (int i = 0; i < 20; i++) {
            carLabels.addElement("car" + (i + 1));
        }
        attributes.addElement(new Attribute("car", carLabels));
        FastVector zipCodeLabels = new FastVector();
        for (int i = 0; i < 9; i++) {
            zipCodeLabels.addElement("zipcode" + (i + 1));
        }
        attributes.addElement(new Attribute("zipcode", zipCodeLabels));
        attributes.addElement(new Attribute("hvalue"));
        attributes.addElement(new Attribute("hyears"));
        attributes.addElement(new Attribute("loan"));
        FastVector classLabels = new FastVector();
        classLabels.addElement("groupA");
        classLabels.addElement("groupB");
        attributes.addElement(new Attribute("class", classLabels));
        this.streamHeader = new InstancesHeader(
                new Instances(getCLICreationString(InstanceStream.class), attributes, 0));
        this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1);
        restart();
    }

    public long estimatedRemainingInstances() {
        return -1;
    }

    public InstancesHeader getHeader() {
        return this.streamHeader;
    }

    public boolean hasMoreInstances() {
        return true;
    }

    public boolean isRestartable() {
        return true;
    }

    public Instance nextInstance() {
        double salary = 0, commission = 0, hvalue = 0, loan = 0;
        int age = 0, elevel = 0, car = 0, zipcode = 0, hyears = 0, group = 0;
        boolean desiredClassFound = false;
        while (!desiredClassFound) {
            // generate attributes
            salary = 20000.0 + 130000.0 * this.instanceRandom.nextDouble();
            commission = (salary >= 75000.0) ? 0 : (10000.0 + 65000.0 * this.instanceRandom.nextDouble());
            // true to c implementation:
            // if (instanceRandom.nextDouble() < 0.5 && salary < 75000.0)
            // commission = 10000.0 + 65000.0 * instanceRandom.nextDouble();
            age = 20 + this.instanceRandom.nextInt(61);
            elevel = this.instanceRandom.nextInt(5);
            car = this.instanceRandom.nextInt(20);
            zipcode = this.instanceRandom.nextInt(9);
            hvalue = (9.0 - zipcode) * 100000.0 * (0.5 + this.instanceRandom.nextDouble());
            hyears = 1 + this.instanceRandom.nextInt(30);
            loan = this.instanceRandom.nextDouble() * 500000.0;
            // determine class
            group = classificationFunctions[this.functionOption.getValue() - 1].determineClass(salary, commission,
                    age, elevel, car, zipcode, hvalue, hyears, loan);
            if (!this.balanceClassesOption.isSet()) {
                desiredClassFound = true;
            } else {
                // balance the classes
                if ((this.nextClassShouldBeZero && (group == 0)) || (!this.nextClassShouldBeZero && (group == 1))) {
                    desiredClassFound = true;
                    this.nextClassShouldBeZero = !this.nextClassShouldBeZero;
                } // else keep searching
            }
        }
        // perturb values
        if (this.peturbFractionOption.getValue() > 0.0) {
            salary = perturbValue(salary, 20000, 150000);
            if (commission > 0) {
                commission = perturbValue(commission, 10000, 75000);
            }
            age = (int) Math.round(perturbValue(age, 20, 80));
            hvalue = perturbValue(hvalue, (9.0 - zipcode) * 100000.0, 0, 135000);
            hyears = (int) Math.round(perturbValue(hyears, 1, 30));
            loan = perturbValue(loan, 0, 500000);
        }
        // construct instance
        InstancesHeader header = getHeader();
        Instance inst = new DenseInstance(header.numAttributes());
        inst.setValue(0, salary);
        inst.setValue(1, commission);
        inst.setValue(2, age);
        inst.setValue(3, elevel);
        inst.setValue(4, car);
        inst.setValue(5, zipcode);
        inst.setValue(6, hvalue);
        inst.setValue(7, hyears);
        inst.setValue(8, loan);
        inst.setDataset(header);
        inst.setClassValue(group);
        return inst;
    }

    protected double perturbValue(double val, double min, double max) {
        return perturbValue(val, max - min, min, max);
    }

    protected double perturbValue(double val, double range, double min, double max) {
        val += range * (2.0 * (this.instanceRandom.nextDouble() - 0.5)) * this.peturbFractionOption.getValue();
        if (val < min) {
            val = min;
        } else if (val > max) {
            val = max;
        }
        return val;
    }

    public void restart() {
        this.instanceRandom = new Random(this.instanceRandomSeedOption.getValue());
        this.nextClassShouldBeZero = false;
    }

    public void getDescription(StringBuilder sb, int indent) {
        // TODO Auto-generated method stub

    }

}