List of usage examples for weka.core Instances enumerateAttributes
publicEnumeration<Attribute> enumerateAttributes()
From source file:GrowTree.java
Attribute bestSplit(Instances D) { double imin = 1.0; Attribute fbest = null;/*w ww.j a va 2s . c o m*/ Enumeration enat = D.enumerateAttributes(); while (enat.hasMoreElements()) { Attribute a = (Attribute) enat.nextElement(); //split D into subsets d1 to dn based on values vi based on features Instances[] split = new Instances[a.numValues()]; for (int i = 0; i < a.numValues(); i++) { split[i] = new Instances(D, D.numInstances()); } Enumeration x = D.enumerateInstances(); while (x.hasMoreElements()) { Instance in = (Instance) x.nextElement(); split[(int) in.value(a)].add(in); } for (int i = 0; i < split.length; i++) { split[i].compactify(); } for (int i = 0; i < a.numValues(); i++) { if (imp(split[i]) < imin) { imin = imp(split[i]); fbest = a; //evaluate the best feature to make root } } } return fbest; }
From source file:ID3Chi.java
License:Open Source License
/** * Method for building an ID3Chi tree.//from w w w . jav a 2 s .co m * * @param data * the training data * @exception Exception * if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. /* if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } /**/ if (data.numInstances() == 0) { SetNullDistribution(data); } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); double entropyOfAllData = computeEntropy(data); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att, entropyOfAllData); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); double chiSquare = computeChiSquare(data, m_Attribute); int degreesOfFreedom = m_Attribute.numValues() - 1; ChiSquaredDistribution chi = new ChiSquaredDistribution(degreesOfFreedom); double threshold = chi.inverseCumulativeProbability(m_confidenceLevel); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { MakeALeaf(data); } else { // Discard unknown values for selected attribute //data.deleteWithMissing(m_Attribute); Instances[] subset = splitData(data, m_Attribute); if (CheckIfCanApplyChiSquare(subset) && (chiSquare <= threshold)) { MakeALeaf(data); return; } m_Successors = new ID3Chi[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new ID3Chi(this.m_confidenceLevel); m_Successors[j].m_Ratio = (double) subset[j].numInstances() / (double) data.numInstances(); m_Successors[j].makeTree(subset[j]); } } }
From source file:cerebro.Id3.java
License:Open Source License
/** * Method for building an Id3 tree./*w ww . j a v a 2 s.c o m*/ * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].makeTree(splitData[j]); } } }
From source file:classifiers.ComplexClassifier.java
@Override public void train(Instances inst) throws Exception { Knoten[] k = Model.getDieknoten();// w ww .j a va 2s . co m Enumeration<Attribute> enu = inst.enumerateAttributes(); int attindex = 0; while (enu.hasMoreElements()) { Attribute att = enu.nextElement(); if (k[attindex].hatEltern()) { switch (att.type()) { case Attribute.NUMERIC: { for (int i = 0; i < k[attindex].anzahlEltern(); i++) { Attribute a = inst.attribute(k[attindex].getEltern(i).getID()); int c = a.index(); switch (a.type()) { case Attribute.NUMERIC: list.add(attindex, (new NumericNumericDistribution(inst, attindex, c))); break; case (Attribute.NOMINAL): list.add(attindex, new NumericNominalDistribution(inst, attindex, c)); break; case (Attribute.STRING): list.add(attindex, new NumericNominalDistribution(inst, attindex, c)); break; default: throw new Exception("Attributetype unbekannt"); } } } break; case Attribute.NOMINAL: { for (int i = 0; i < k[attindex].anzahlEltern(); i++) { Attribute a = inst.attribute(k[attindex].getEltern(i).getID()); int c = a.index(); switch (a.type()) { case Attribute.NUMERIC: list.add(attindex, new NumericNominalDistribution(inst, attindex, c)); break; case (Attribute.NOMINAL): list.add(attindex, new NominalNominalDistribution(inst, attindex, c)); break; case (Attribute.STRING): list.add(attindex, new NominalNominalDistribution(inst, attindex, c)); break; default: { throw new Exception("Attributetype unbekannt"); } } } } break; } } else { switch (att.type()) { case Attribute.NUMERIC: list.add(attindex, new NumericDistribution(inst, attindex)); break; case Attribute.NOMINAL: list.add(attindex, new NominalDistribution(inst, attindex)); break; case Attribute.STRING: list.add(attindex, new NominalDistribution(inst, attindex)); break; default: throw new Exception("Attributetype unbekannt"); } } attindex++; } for (int i = 0; i < inst.numClasses(); i++) { for (int j = 0; j < inst.numInstances(); j++) { if (inst.instance(j).classValue() == i) { Classparam[i]++; } } } for (int i = 0; i < inst.numClasses(); i++) { Classparam[i] /= inst.numInstances(); } }
From source file:classifiers.ComplexClassifierZufall.java
@Override public void train(Instances inst) throws Exception { Knoten[] k = Model.getDieknoten();/*from w w w.j a v a 2 s. c o m*/ Enumeration<Attribute> enu = inst.enumerateAttributes(); int attindex = 0; while (enu.hasMoreElements()) { Attribute att = enu.nextElement(); if (k[attindex].hatEltern()) { switch (att.type()) { case Attribute.NUMERIC: { for (int i = 0; i < k[attindex].anzahlEltern(); i++) { Attribute a = inst.attribute(k[attindex].getEltern(i).getID()); int c = a.index(); switch (a.type()) { case Attribute.NUMERIC: list.add(attindex, (new NumericNumericDistribution(inst, attindex, c))); break; case (Attribute.NOMINAL): list.add(attindex, new NumericNominalDistribution(inst, attindex, c)); break; case (Attribute.STRING): list.add(attindex, new NumericNominalDistribution(inst, attindex, c)); break; default: throw new Exception("Attributetype unbekannt"); } } } break; case Attribute.NOMINAL: { for (int i = 0; i < k[attindex].anzahlEltern(); i++) { Attribute a = inst.attribute(k[attindex].getEltern(i).getID()); int c = a.index(); switch (a.type()) { case Attribute.NUMERIC: list.add(attindex, new NumericNominalDistribution(inst, attindex, c)); break; case (Attribute.NOMINAL): list.add(attindex, new NominalNominalDistribution(inst, attindex, c)); break; case (Attribute.STRING): list.add(attindex, new NominalNominalDistribution(inst, attindex, c)); break; default: { throw new Exception("Attributetype unbekannt"); } } } } break; } } else { switch (att.type()) { case Attribute.NUMERIC: list.add(attindex, new NumericDistribution(inst, attindex)); break; case Attribute.NOMINAL: list.add(attindex, new NominalDistribution(inst, attindex)); break; case Attribute.STRING: list.add(attindex, new NominalDistribution(inst, attindex)); break; default: throw new Exception("Attributetype unbekannt"); } } attindex++; } for (int i = 0; i < inst.numClasses(); i++) { for (int j = 0; j < inst.numInstances(); j++) { if (inst.instance(j).classValue() == i) { Classparam[i]++; } } } for (int i = 0; i < inst.numClasses(); i++) { Classparam[i] /= inst.numInstances(); } }
From source file:com.relationalcloud.main.Explanation.java
License:Open Source License
/** * @param args/*from w w w.j a v a 2 s.c o m*/ */ public static void main(String[] args) { // LOADING PROPERTY FILE AND DRIVER Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // Register jdbcDriver try { Class.forName(ini.getProperty("driver")); } catch (ClassNotFoundException e) { e.printStackTrace(); } // LOAD PROPERTIES FROM CONFIGURATION FILE String connection = ini.getProperty("conn"); String schemaname = ini.getProperty("schema"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); String txnLogTable = ini.getProperty("txnLogTable"); String numb_trans_to_process = ini.getProperty("Explanation.numTxnsToExtractTemplates"); int numPart = Integer.parseInt(ini.getProperty("numPartitions")); // Initialize the Justification Handler ExplanationHandler jh = new ExplanationHandler(ini); System.out.println("Loading and processing " + jh.schemaname + " traces... considering prop file :" + jh.dbPropertyFile); try { // CREATE A DB CONNEctioN Connection conn = DriverManager.getConnection(connection + schemaname, user, password); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); // ANALYZE WORKLOADS EXTRACTING TABLES, ATTRIBUTES AND FREQUENCIES ExplanationWorkloadPrepocessor wa = ExplanationHandler.analyzeWorkload(txnLogTable, numb_trans_to_process, schemaname, conn, schema); // FOR EACH TABLE CLASSIFY AND POPULATE JUSTIFICATION COLUMN for (String tableProcessed : wa.getAllTableNames()) { System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE " + tableProcessed); // FETCH THE INSTANCE FROM THE DB AND SAMPLE IT Instances data = jh.generateInstancesForTable(tableProcessed, wa.getFeatures(tableProcessed), conn); // IF THERE IS ONLY THE PARTITION LABEL, SKIP THE TABLE if (data.numAttributes() < 2) { System.out.println("No transactions touches this table, nothing to be done."); continue; } // INSTANTIATE THE CLASSIFIER String[] options; options = new String[3]; options[0] = "-P"; options[1] = "-C"; options[2] = ini.getProperty("Explanation.j48PruningConfidence"); J48 classifier = new J48(); // new instance of tree classifier.setOptions(options); // set the options Boolean attributeFilter = true; // ATTRIBUTE FILTERING Instances newData; if (data.numClasses() > 1 && attributeFilter) { AttributeSelection filter = new AttributeSelection(); //FIXME TRYING ALTERNATIVE ATTRIBUTE SELECTION STRATEGIES //InfoGainAttributeEval eval = new InfoGainAttributeEval(); //Ranker search = new Ranker(); //search.setNumToSelect(Integer.parseInt(ini.getProperty("Explanation.maxNumberOfAttribute","2"))); CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); filter.setEvaluator(eval); filter.setSearch(search); filter.setInputFormat(data); newData = Filter.useFilter(data, filter); } else { newData = data; } String atts = ""; Enumeration e = newData.enumerateAttributes(); ArrayList<String> attributesForPopulation = new ArrayList<String>(); while (e.hasMoreElements()) { String s = ((Attribute) e.nextElement()).name(); attributesForPopulation.add(s); atts += s + ", "; } atts = atts.substring(0, atts.length() - 2); System.out.println("Attribute filtering reduced " + (data.numAttributes() - 1) + " to " + (newData.numAttributes() - 1) + " (" + atts + ")"); data = null; System.gc(); if (newData.numInstances() < 1) { System.err.println("The are no data in the table, skipping classification"); continue; } if (newData.numInstances() > 0) { if (newData.classAttribute().numValues() > 1) { // TRAIN THE CLASSIFIER AND PRINT OUT CLASSIFIER RULES ExplanationHandler.trainClassifier(newData, classifier); if (classifier.measureNumLeaves() == 1) { int partitionvalue = (int) classifier.classifyInstance(newData.firstInstance()); System.out.println( "The classifier decided to put all the tuplesi in the table in one partition: " + partitionvalue); if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation, conn); } } // POPULATING THE justifiedpartition column with the result of this // classifier if required else if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateJustifiedColumn(tableProcessed, classifier, attributesForPopulation, conn, numPart, newData.classAttribute().enumerateValues()); } } else { // easy case... the class attribute is unary!! int partitionvalue = ((int) newData.firstInstance() .value(newData.firstInstance().classIndex())); System.out.println("The table is all stored in one partition, no need to use classifier"); if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation, conn); } } } else throw new Exception("The Instances is empty"); } // SET HASH PARTITION / REPLICATED PARTITION if (Boolean.parseBoolean(ini.getProperty("Explanation.populateHashColumn"))) { jh.populateHashPartition(conn); } if (Boolean.parseBoolean(ini.getProperty("Explanation.populateReplicatedColumn"))) { jh.populateReplicatedPartition(conn, Boolean.parseBoolean(ini.getProperty("Explanation.defaultReplicate"))); } conn.close(); } catch (SQLException e) { e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:decisiontree.MyC45.java
/** * Method for building an C45 tree./*from w w w . jav a 2s . c om*/ * * @param instances the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances instances) throws Exception { // Check if no instances have reached this node. if (instances.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[instances.numClasses()]; return; } // Compute attribute with maximum gain ratio. double[] gainRatios = new double[instances.numAttributes()]; Enumeration attrEnum = instances.enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); if (attr.isNominal()) { gainRatios[attr.index()] = computeGainRatio(instances, attr); } else if (attr.isNumeric()) { gainRatios[attr.index()] = computeGainRatio(instances, attr, computeThreshold(instances, attr)); } } m_Attribute = instances.attribute(Utils.maxIndex(gainRatios)); // Make leaf if gain ratio is zero. // Otherwise create successors. if (Utils.eq(gainRatios[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[instances.numClasses()]; Enumeration instEnum = instances.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = instances.classAttribute(); } else { Instances[] splitData = null; int child = 0; if (m_Attribute.isNominal()) { child = m_Attribute.numValues(); splitData = splitData(instances, m_Attribute); } else if (m_Attribute.isNumeric()) { child = 2; splitData = splitData(instances, m_Attribute, computeThreshold(instances, m_Attribute)); } m_Successors = new MyC45[child]; for (int j = 0; j < child; j++) { m_Successors[j] = new MyC45(); m_Successors[j].makeTree(splitData[j]); } } }
From source file:decisiontree.MyC45.java
private Instances handleMissingValues(Instances data) throws Exception { Instances newData = data; Enumeration attrEnum = newData.enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); AttributeStats attrStats = newData.attributeStats(attr.index()); if (attr.isNominal()) { int maxIdx = 0; for (int i = 0; i < attr.numValues(); i++) { if (attrStats.nominalCounts[i] > attrStats.nominalCounts[maxIdx]) { maxIdx = i;/* w ww . jav a 2 s. co m*/ } } for (int i = 0; i < newData.numInstances(); i++) { if (newData.instance(i).isMissing(attr.index())) { newData.instance(i).setValue(attr.index(), maxIdx); } } } else if (attr.isNumeric()) { double mean = attrStats.numericStats.mean; for (int i = 0; i < newData.numInstances(); i++) { if (newData.instance(i).isMissing(attr.index())) { newData.instance(i).setValue(attr.index(), mean); } } } } return newData; }
From source file:decisiontree.MyID3.java
private void makeTree(Instances data) { // Check if no instances have reached this node. if (data.numInstances() == 0) { splitAttr = null;//from ww w .j a v a 2 s . c om leafValue = Double.NaN; leafDist = new double[data.numClasses()]; return; } if (data.numDistinctValues(data.classIndex()) == 1) { leafValue = data.firstInstance().classValue(); return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } splitAttr = data.attribute(maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[splitAttr.index()], 0)) { splitAttr = null; leafDist = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); leafDist[(int) inst.classValue()]++; } normalize(leafDist); leafValue = Utils.maxIndex(leafDist); classAttr = data.classAttribute(); } else { Instances[] splitData = splitData(data, splitAttr); child = new MyID3[splitAttr.numValues()]; for (int j = 0; j < splitAttr.numValues(); j++) { child[j] = new MyID3(); child[j].makeTree(splitData[j]); } } }
From source file:dewaweebtreeclassifier.Sujeong.java
public void buildTree(Instances instances) throws java.lang.Exception { if (instances.numAttributes() < 1) { throw new Exception("Data instances need to have minimum of 1 attribute."); } else if (instances.numAttributes() == 1) { this.value = instances.meanOrMode(instances.classIndex()); } else {//from w w w.j a v a 2 s .c om Enumeration attrs = instances.enumerateAttributes(); double informationGain = 0.0; while (attrs.hasMoreElements()) { Attribute attr = (Attribute) attrs.nextElement(); double tmpGain = computeGain(instances, attr); if (tmpGain > informationGain) { bestAttr = attr; informationGain = tmpGain; } } if (bestAttr != null) { double mode = instances.meanOrMode(instances.classIndex()); Instances[] chunks = splitInstancesOnAttribute(instances, bestAttr); children = new Sujeong[chunks.length]; for (int i = 0; i < chunks.length; ++i) { Instances chunk = chunks[i]; Sujeong child = new Sujeong(); children[i] = child; if (chunk.numInstances() > 0) child.buildTree(chunk); else child.value = mode; } } else { this.value = instances.meanOrMode(instances.classIndex()); } } }