Example usage for weka.core Instances enumerateAttributes

Introduction

In this page you can find the example usage for weka.core Instances enumerateAttributes.

Prototype

publicEnumeration<Attribute> enumerateAttributes()

Source Link

Document

Returns an enumeration of all the attributes.

Usage

From source file:GrowTree.java

Attribute bestSplit(Instances D) {
    double imin = 1.0;
    Attribute fbest = null;/*w ww.j a va 2s  .  c  o  m*/
    Enumeration enat = D.enumerateAttributes();
    while (enat.hasMoreElements()) {
        Attribute a = (Attribute) enat.nextElement();
        //split D into subsets d1 to dn based on values vi based on features
        Instances[] split = new Instances[a.numValues()];
        for (int i = 0; i < a.numValues(); i++) {
            split[i] = new Instances(D, D.numInstances());
        }
        Enumeration x = D.enumerateInstances();
        while (x.hasMoreElements()) {
            Instance in = (Instance) x.nextElement();
            split[(int) in.value(a)].add(in);
        }
        for (int i = 0; i < split.length; i++) {
            split[i].compactify();
        }
        for (int i = 0; i < a.numValues(); i++) {
            if (imp(split[i]) < imin) {
                imin = imp(split[i]);
                fbest = a; //evaluate the best feature to make root
            }
        }
    }
    return fbest;

}

From source file:ID3Chi.java

License:Open Source License

/**
 * Method for building an ID3Chi tree.//from w w  w  . jav a 2  s .co m
 *
 * @param data
 *            the training data
 * @exception Exception
 *                if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    /*
    if (data.numInstances() == 0) {
       m_Attribute = null;
       m_ClassValue = Instance.missingValue();
       m_Distribution = new double[data.numClasses()];
       return;
    }
    /**/
    if (data.numInstances() == 0) {
        SetNullDistribution(data);
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    double entropyOfAllData = computeEntropy(data);

    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att, entropyOfAllData);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    double chiSquare = computeChiSquare(data, m_Attribute);

    int degreesOfFreedom = m_Attribute.numValues() - 1;
    ChiSquaredDistribution chi = new ChiSquaredDistribution(degreesOfFreedom);
    double threshold = chi.inverseCumulativeProbability(m_confidenceLevel);

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        MakeALeaf(data);
    } else {
        // Discard unknown values for selected attribute
        //data.deleteWithMissing(m_Attribute);
        Instances[] subset = splitData(data, m_Attribute);

        if (CheckIfCanApplyChiSquare(subset) && (chiSquare <= threshold)) {
            MakeALeaf(data);
            return;
        }

        m_Successors = new ID3Chi[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new ID3Chi(this.m_confidenceLevel);
            m_Successors[j].m_Ratio = (double) subset[j].numInstances() / (double) data.numInstances();
            m_Successors[j].makeTree(subset[j]);
        }
    }
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Method for building an Id3 tree./*w  ww .  j a v  a 2 s.c o  m*/
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new Id3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new Id3();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:classifiers.ComplexClassifier.java

@Override
public void train(Instances inst) throws Exception {
    Knoten[] k = Model.getDieknoten();// w ww .j a va 2s .  co m
    Enumeration<Attribute> enu = inst.enumerateAttributes();
    int attindex = 0;
    while (enu.hasMoreElements()) {
        Attribute att = enu.nextElement();
        if (k[attindex].hatEltern()) {

            switch (att.type()) {
            case Attribute.NUMERIC: {

                for (int i = 0; i < k[attindex].anzahlEltern(); i++) {
                    Attribute a = inst.attribute(k[attindex].getEltern(i).getID());
                    int c = a.index();
                    switch (a.type()) {
                    case Attribute.NUMERIC:
                        list.add(attindex, (new NumericNumericDistribution(inst, attindex, c)));
                        break;
                    case (Attribute.NOMINAL):
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.STRING):
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    default:
                        throw new Exception("Attributetype unbekannt");
                    }
                }

            }
                break;
            case Attribute.NOMINAL: {
                for (int i = 0; i < k[attindex].anzahlEltern(); i++) {

                    Attribute a = inst.attribute(k[attindex].getEltern(i).getID());

                    int c = a.index();

                    switch (a.type()) {

                    case Attribute.NUMERIC:
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.NOMINAL):
                        list.add(attindex, new NominalNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.STRING):
                        list.add(attindex, new NominalNominalDistribution(inst, attindex, c));
                        break;
                    default: {
                        throw new Exception("Attributetype unbekannt");
                    }
                    }
                }
            }
                break;
            }
        } else {

            switch (att.type()) {
            case Attribute.NUMERIC:

                list.add(attindex, new NumericDistribution(inst, attindex));
                break;
            case Attribute.NOMINAL:

                list.add(attindex, new NominalDistribution(inst, attindex));
                break;
            case Attribute.STRING:
                list.add(attindex, new NominalDistribution(inst, attindex));
                break;
            default:
                throw new Exception("Attributetype unbekannt");

            }
        }
        attindex++;
    }

    for (int i = 0; i < inst.numClasses(); i++) {
        for (int j = 0; j < inst.numInstances(); j++) {
            if (inst.instance(j).classValue() == i) {
                Classparam[i]++;
            }

        }

    }

    for (int i = 0; i < inst.numClasses(); i++) {
        Classparam[i] /= inst.numInstances();
    }

}

From source file:classifiers.ComplexClassifierZufall.java

@Override
public void train(Instances inst) throws Exception {

    Knoten[] k = Model.getDieknoten();/*from w w w.j  a  v a 2 s. c o m*/
    Enumeration<Attribute> enu = inst.enumerateAttributes();
    int attindex = 0;
    while (enu.hasMoreElements()) {
        Attribute att = enu.nextElement();
        if (k[attindex].hatEltern()) {

            switch (att.type()) {
            case Attribute.NUMERIC: {

                for (int i = 0; i < k[attindex].anzahlEltern(); i++) {
                    Attribute a = inst.attribute(k[attindex].getEltern(i).getID());
                    int c = a.index();
                    switch (a.type()) {
                    case Attribute.NUMERIC:
                        list.add(attindex, (new NumericNumericDistribution(inst, attindex, c)));
                        break;
                    case (Attribute.NOMINAL):
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.STRING):
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    default:
                        throw new Exception("Attributetype unbekannt");
                    }
                }

            }
                break;
            case Attribute.NOMINAL: {
                for (int i = 0; i < k[attindex].anzahlEltern(); i++) {

                    Attribute a = inst.attribute(k[attindex].getEltern(i).getID());

                    int c = a.index();

                    switch (a.type()) {

                    case Attribute.NUMERIC:
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.NOMINAL):
                        list.add(attindex, new NominalNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.STRING):
                        list.add(attindex, new NominalNominalDistribution(inst, attindex, c));
                        break;
                    default: {
                        throw new Exception("Attributetype unbekannt");
                    }
                    }
                }
            }
                break;
            }
        } else {

            switch (att.type()) {
            case Attribute.NUMERIC:

                list.add(attindex, new NumericDistribution(inst, attindex));
                break;
            case Attribute.NOMINAL:

                list.add(attindex, new NominalDistribution(inst, attindex));
                break;
            case Attribute.STRING:
                list.add(attindex, new NominalDistribution(inst, attindex));
                break;
            default:
                throw new Exception("Attributetype unbekannt");

            }
        }
        attindex++;
    }

    for (int i = 0; i < inst.numClasses(); i++) {
        for (int j = 0; j < inst.numInstances(); j++) {
            if (inst.instance(j).classValue() == i) {
                Classparam[i]++;
            }

        }

    }

    for (int i = 0; i < inst.numClasses(); i++) {
        Classparam[i] /= inst.numInstances();
    }

}

From source file:com.relationalcloud.main.Explanation.java

License:Open Source License

/**
 * @param args/*from  w w  w.j  a v a  2 s.c  o  m*/
 */
public static void main(String[] args) {

    // LOADING PROPERTY FILE AND DRIVER
    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    // Register jdbcDriver
    try {
        Class.forName(ini.getProperty("driver"));
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    // LOAD PROPERTIES FROM CONFIGURATION FILE
    String connection = ini.getProperty("conn");
    String schemaname = ini.getProperty("schema");

    String user = ini.getProperty("user");
    String password = ini.getProperty("password");
    String txnLogTable = ini.getProperty("txnLogTable");
    String numb_trans_to_process = ini.getProperty("Explanation.numTxnsToExtractTemplates");

    int numPart = Integer.parseInt(ini.getProperty("numPartitions"));

    // Initialize the Justification Handler
    ExplanationHandler jh = new ExplanationHandler(ini);

    System.out.println("Loading and processing " + jh.schemaname + " traces... considering prop file :"
            + jh.dbPropertyFile);

    try {

        // CREATE A DB CONNEctioN
        Connection conn = DriverManager.getConnection(connection + schemaname, user, password);
        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        // ANALYZE WORKLOADS EXTRACTING TABLES, ATTRIBUTES AND FREQUENCIES
        ExplanationWorkloadPrepocessor wa = ExplanationHandler.analyzeWorkload(txnLogTable,
                numb_trans_to_process, schemaname, conn, schema);

        // FOR EACH TABLE CLASSIFY AND POPULATE JUSTIFICATION COLUMN
        for (String tableProcessed : wa.getAllTableNames()) {

            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE " + tableProcessed);

            // FETCH THE INSTANCE FROM THE DB AND SAMPLE IT
            Instances data = jh.generateInstancesForTable(tableProcessed, wa.getFeatures(tableProcessed), conn);

            // IF THERE IS ONLY THE PARTITION LABEL, SKIP THE TABLE
            if (data.numAttributes() < 2) {
                System.out.println("No transactions touches this table, nothing to be done.");
                continue;
            }
            // INSTANTIATE THE CLASSIFIER
            String[] options;
            options = new String[3];
            options[0] = "-P";
            options[1] = "-C";
            options[2] = ini.getProperty("Explanation.j48PruningConfidence");
            J48 classifier = new J48(); // new instance of tree
            classifier.setOptions(options); // set the options

            Boolean attributeFilter = true;
            // ATTRIBUTE FILTERING
            Instances newData;
            if (data.numClasses() > 1 && attributeFilter) {
                AttributeSelection filter = new AttributeSelection();

                //FIXME TRYING ALTERNATIVE ATTRIBUTE SELECTION STRATEGIES
                //InfoGainAttributeEval eval = new InfoGainAttributeEval();
                //Ranker search = new Ranker();
                //search.setNumToSelect(Integer.parseInt(ini.getProperty("Explanation.maxNumberOfAttribute","2")));
                CfsSubsetEval eval = new CfsSubsetEval();
                GreedyStepwise search = new GreedyStepwise();

                search.setSearchBackwards(true);
                filter.setEvaluator(eval);
                filter.setSearch(search);
                filter.setInputFormat(data);
                newData = Filter.useFilter(data, filter);
            } else {
                newData = data;
            }

            String atts = "";
            Enumeration e = newData.enumerateAttributes();
            ArrayList<String> attributesForPopulation = new ArrayList<String>();
            while (e.hasMoreElements()) {
                String s = ((Attribute) e.nextElement()).name();
                attributesForPopulation.add(s);
                atts += s + ", ";
            }
            atts = atts.substring(0, atts.length() - 2);

            System.out.println("Attribute filtering reduced " + (data.numAttributes() - 1) + " to "
                    + (newData.numAttributes() - 1) + " (" + atts + ")");

            data = null;
            System.gc();

            if (newData.numInstances() < 1) {
                System.err.println("The are no data in the table, skipping classification");
                continue;
            }

            if (newData.numInstances() > 0) {
                if (newData.classAttribute().numValues() > 1) {
                    // TRAIN THE CLASSIFIER AND PRINT OUT CLASSIFIER RULES
                    ExplanationHandler.trainClassifier(newData, classifier);

                    if (classifier.measureNumLeaves() == 1) {

                        int partitionvalue = (int) classifier.classifyInstance(newData.firstInstance());
                        System.out.println(
                                "The classifier decided to put all the tuplesi in the table in one partition: "
                                        + partitionvalue);
                        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                            jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                    conn);
                        }

                    }

                    // POPULATING THE justifiedpartition column with the result of this
                    // classifier if required
                    else if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateJustifiedColumn(tableProcessed, classifier, attributesForPopulation, conn,
                                numPart, newData.classAttribute().enumerateValues());
                    }

                } else { // easy case... the class attribute is unary!!
                    int partitionvalue = ((int) newData.firstInstance()
                            .value(newData.firstInstance().classIndex()));
                    System.out.println("The table is all stored in one partition, no need to use classifier");
                    if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                conn);
                    }
                }
            } else
                throw new Exception("The Instances is empty");

        }

        // SET HASH PARTITION / REPLICATED PARTITION
        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateHashColumn"))) {
            jh.populateHashPartition(conn);
        }

        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateReplicatedColumn"))) {
            jh.populateReplicatedPartition(conn,
                    Boolean.parseBoolean(ini.getProperty("Explanation.defaultReplicate")));
        }

        conn.close();
    } catch (SQLException e) {
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:decisiontree.MyC45.java

/**
* Method for building an C45 tree./*from  w  w  w  .  jav  a  2s  . c om*/
*
* @param instances the training data
* @exception Exception if decision tree can't be built successfully
*/
private void makeTree(Instances instances) throws Exception {

    // Check if no instances have reached this node.
    if (instances.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[instances.numClasses()];
        return;
    }

    // Compute attribute with maximum gain ratio.
    double[] gainRatios = new double[instances.numAttributes()];
    Enumeration attrEnum = instances.enumerateAttributes();
    while (attrEnum.hasMoreElements()) {
        Attribute attr = (Attribute) attrEnum.nextElement();
        if (attr.isNominal()) {
            gainRatios[attr.index()] = computeGainRatio(instances, attr);
        } else if (attr.isNumeric()) {
            gainRatios[attr.index()] = computeGainRatio(instances, attr, computeThreshold(instances, attr));
        }
    }
    m_Attribute = instances.attribute(Utils.maxIndex(gainRatios));

    // Make leaf if gain ratio is zero. 
    // Otherwise create successors.
    if (Utils.eq(gainRatios[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[instances.numClasses()];
        Enumeration instEnum = instances.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = instances.classAttribute();
    } else {
        Instances[] splitData = null;
        int child = 0;
        if (m_Attribute.isNominal()) {
            child = m_Attribute.numValues();
            splitData = splitData(instances, m_Attribute);
        } else if (m_Attribute.isNumeric()) {
            child = 2;
            splitData = splitData(instances, m_Attribute, computeThreshold(instances, m_Attribute));
        }
        m_Successors = new MyC45[child];
        for (int j = 0; j < child; j++) {
            m_Successors[j] = new MyC45();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:decisiontree.MyC45.java

private Instances handleMissingValues(Instances data) throws Exception {
    Instances newData = data;
    Enumeration attrEnum = newData.enumerateAttributes();
    while (attrEnum.hasMoreElements()) {
        Attribute attr = (Attribute) attrEnum.nextElement();
        AttributeStats attrStats = newData.attributeStats(attr.index());
        if (attr.isNominal()) {
            int maxIdx = 0;
            for (int i = 0; i < attr.numValues(); i++) {
                if (attrStats.nominalCounts[i] > attrStats.nominalCounts[maxIdx]) {
                    maxIdx = i;/* w ww  . jav  a 2  s.  co  m*/
                }
            }

            for (int i = 0; i < newData.numInstances(); i++) {
                if (newData.instance(i).isMissing(attr.index())) {
                    newData.instance(i).setValue(attr.index(), maxIdx);
                }
            }
        } else if (attr.isNumeric()) {
            double mean = attrStats.numericStats.mean;
            for (int i = 0; i < newData.numInstances(); i++) {
                if (newData.instance(i).isMissing(attr.index())) {
                    newData.instance(i).setValue(attr.index(), mean);
                }
            }
        }
    }

    return newData;
}

From source file:decisiontree.MyID3.java

private void makeTree(Instances data) {
    // Check if no instances have reached this node.  
    if (data.numInstances() == 0) {
        splitAttr = null;//from ww  w .j  a v  a 2 s .  c om
        leafValue = Double.NaN;
        leafDist = new double[data.numClasses()];
        return;
    }

    if (data.numDistinctValues(data.classIndex()) == 1) {
        leafValue = data.firstInstance().classValue();
        return;
    }

    // Compute attribute with maximum information gain.  
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    splitAttr = data.attribute(maxIndex(infoGains));

    // Make leaf if information gain is zero.   
    // Otherwise create successors.  
    if (Utils.eq(infoGains[splitAttr.index()], 0)) {
        splitAttr = null;
        leafDist = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            leafDist[(int) inst.classValue()]++;
        }
        normalize(leafDist);
        leafValue = Utils.maxIndex(leafDist);
        classAttr = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, splitAttr);
        child = new MyID3[splitAttr.numValues()];
        for (int j = 0; j < splitAttr.numValues(); j++) {
            child[j] = new MyID3();
            child[j].makeTree(splitData[j]);
        }
    }
}

From source file:dewaweebtreeclassifier.Sujeong.java

public void buildTree(Instances instances) throws java.lang.Exception {
    if (instances.numAttributes() < 1) {
        throw new Exception("Data instances need to have minimum of 1 attribute.");
    } else if (instances.numAttributes() == 1) {
        this.value = instances.meanOrMode(instances.classIndex());
    } else {//from   w w w.j a  v a  2  s .c  om
        Enumeration attrs = instances.enumerateAttributes();
        double informationGain = 0.0;
        while (attrs.hasMoreElements()) {
            Attribute attr = (Attribute) attrs.nextElement();
            double tmpGain = computeGain(instances, attr);
            if (tmpGain > informationGain) {
                bestAttr = attr;
                informationGain = tmpGain;
            }
        }
        if (bestAttr != null) {
            double mode = instances.meanOrMode(instances.classIndex());
            Instances[] chunks = splitInstancesOnAttribute(instances, bestAttr);
            children = new Sujeong[chunks.length];
            for (int i = 0; i < chunks.length; ++i) {
                Instances chunk = chunks[i];
                Sujeong child = new Sujeong();
                children[i] = child;
                if (chunk.numInstances() > 0)
                    child.buildTree(chunk);
                else
                    child.value = mode;
            }
        } else {
            this.value = instances.meanOrMode(instances.classIndex());
        }
    }
}