List of usage examples for weka.core Instances enumerateInstances
publicEnumeration<Instance> enumerateInstances()
From source file:iris.ID3.java
private Instances[] makeBins(Instances instances, Attribute att) { // Create array of bins based on numValues in Attribute parameter Instances[] bins = new Instances[att.numValues()]; for (int i = 0; i < att.numValues(); i++) { bins[i] = new Instances(instances, instances.numInstances()); }/*from www. j a va 2s. c o m*/ // Create pointer to first instance Enumeration instanceEnum = instances.enumerateInstances(); while (instanceEnum.hasMoreElements()) { // Create new instance from the one pointer is pointing at Instance oneInstance = (Instance) instanceEnum.nextElement(); // Add instance to the proper bin bins[(int) oneInstance.value(att)].add(oneInstance); } // Compactify for (int i = 0; i < bins.length; i++) { bins[i].compactify(); } return bins; }
From source file:j48.BinC45Split.java
License:Open Source License
/** * Creates split on enumerated attribute. * * @exception Exception if something goes wrong *//*from w w w.j av a2 s . c o m*/ private void handleEnumeratedAttribute(Instances trainInstances) throws Exception { Distribution newDistribution, secondDistribution; int numAttValues; double currIG, currGR; Instance instance; int i; numAttValues = trainInstances.attribute(m_attIndex).numValues(); newDistribution = new Distribution(numAttValues, trainInstances.numClasses()); // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (!instance.isMissing(m_attIndex)) newDistribution.add((int) instance.value(m_attIndex), instance); } m_distribution = newDistribution; // For all values for (i = 0; i < numAttValues; i++) { if (Utils.grOrEq(newDistribution.perBag(i), m_minNoObj)) { secondDistribution = new Distribution(newDistribution, i); // Check if minimum number of Instances in the two // subsets. if (secondDistribution.check(m_minNoObj)) { m_numSubsets = 2; currIG = m_infoGainCrit.splitCritValue(secondDistribution, m_sumOfWeights); currGR = m_gainRatioCrit.splitCritValue(secondDistribution, m_sumOfWeights, currIG); if ((i == 0) || Utils.gr(currGR, m_gainRatio)) { m_gainRatio = currGR; m_infoGain = currIG; m_splitPoint = (double) i; m_distribution = secondDistribution; } } } } }
From source file:j48.BinC45Split.java
License:Open Source License
/** * Creates split on numeric attribute./*from www . j a v a2s .co m*/ * * @exception Exception if something goes wrong */ private void handleNumericAttribute(Instances trainInstances) throws Exception { int firstMiss; int next = 1; int last = 0; int index = 0; int splitIndex = -1; double currentInfoGain; double defaultEnt; double minSplit; Instance instance; int i; // Current attribute is a numeric attribute. m_distribution = new Distribution(2, trainInstances.numClasses()); // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); i = 0; while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (instance.isMissing(m_attIndex)) break; m_distribution.add(1, instance); i++; } firstMiss = i; // Compute minimum number of Instances required in each // subset. minSplit = 0.1 * (m_distribution.total()) / ((double) trainInstances.numClasses()); if (Utils.smOrEq(minSplit, m_minNoObj)) minSplit = m_minNoObj; else if (Utils.gr(minSplit, 25)) minSplit = 25; // Enough Instances with known values? if (Utils.sm((double) firstMiss, 2 * minSplit)) return; // Compute values of criteria for all possible split // indices. defaultEnt = m_infoGainCrit.oldEnt(m_distribution); while (next < firstMiss) { if (trainInstances.instance(next - 1).value(m_attIndex) + 1e-5 < trainInstances.instance(next) .value(m_attIndex)) { // Move class values for all Instances up to next // possible split point. m_distribution.shiftRange(1, 0, trainInstances, last, next); // Check if enough Instances in each subset and compute // values for criteria. if (Utils.grOrEq(m_distribution.perBag(0), minSplit) && Utils.grOrEq(m_distribution.perBag(1), minSplit)) { currentInfoGain = m_infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights, defaultEnt); if (Utils.gr(currentInfoGain, m_infoGain)) { m_infoGain = currentInfoGain; splitIndex = next - 1; } index++; } last = next; } next++; } // Was there any useful split? if (index == 0) return; // Compute modified information gain for best split. m_infoGain = m_infoGain - (Utils.log2(index) / m_sumOfWeights); if (Utils.smOrEq(m_infoGain, 0)) return; // Set instance variables' values to values for // best split. m_numSubsets = 2; m_splitPoint = (trainInstances.instance(splitIndex + 1).value(m_attIndex) + trainInstances.instance(splitIndex).value(m_attIndex)) / 2; // In case we have a numerical precision problem we need to choose the // smaller value if (m_splitPoint == trainInstances.instance(splitIndex + 1).value(m_attIndex)) { m_splitPoint = trainInstances.instance(splitIndex).value(m_attIndex); } // Restore distributioN for best split. m_distribution = new Distribution(2, trainInstances.numClasses()); m_distribution.addRange(0, trainInstances, 0, splitIndex + 1); m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss); // Compute modified gain ratio for best split. m_gainRatio = m_gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain); }
From source file:j48.BinC45Split.java
License:Open Source License
/** * Sets split point to greatest value in given data smaller or equal to * old split point.//www . j a v a2 s. co m * (C4.5 does this for some strange reason). */ public final void setSplitPoint(Instances allInstances) { double newSplitPoint = -Double.MAX_VALUE; double tempValue; Instance instance; if ((!allInstances.attribute(m_attIndex).isNominal()) && (m_numSubsets > 1)) { Enumeration enu = allInstances.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (!instance.isMissing(m_attIndex)) { tempValue = instance.value(m_attIndex); if (Utils.gr(tempValue, newSplitPoint) && Utils.smOrEq(tempValue, m_splitPoint)) newSplitPoint = tempValue; } } m_splitPoint = newSplitPoint; } }
From source file:j48.C45Split.java
License:Open Source License
/** * Creates split on enumerated attribute. * //w w w .java 2s . c o m * @exception Exception * if something goes wrong */ private void handleEnumeratedAttribute(Instances trainInstances) throws Exception { Instance instance; m_distribution = new Distribution(m_complexityIndex, trainInstances.numClasses()); // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (!instance.isMissing(m_attIndex)) m_distribution.add((int) instance.value(m_attIndex), instance); } // Check if minimum number of Instances in at least two // subsets. if (m_distribution.check(m_minNoObj)) { m_numSubsets = m_complexityIndex; m_infoGain = infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights); m_gainRatio = gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain); } }
From source file:j48.C45Split.java
License:Open Source License
/** * Creates split on numeric attribute./*ww w . ja v a 2 s . com*/ * * @exception Exception * if something goes wrong */ private void handleNumericAttribute(Instances trainInstances) throws Exception { int firstMiss; int next = 1; int last = 0; int splitIndex = -1; double currentInfoGain; double defaultEnt; double minSplit; Instance instance; int i; // Current attribute is a numeric attribute. m_distribution = new Distribution(2, trainInstances.numClasses()); // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); i = 0; while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (instance.isMissing(m_attIndex)) break; m_distribution.add(1, instance); i++; } firstMiss = i; // Compute minimum number of Instances required in each // subset. minSplit = 0.1 * (m_distribution.total()) / ((double) trainInstances.numClasses()); if (Utils.smOrEq(minSplit, m_minNoObj)) minSplit = m_minNoObj; else if (Utils.gr(minSplit, 25)) minSplit = 25; // Enough Instances with known values? if (Utils.sm((double) firstMiss, 2 * minSplit)) return; // Compute values of criteria for all possible split // indices. defaultEnt = infoGainCrit.oldEnt(m_distribution); while (next < firstMiss) { if (trainInstances.instance(next - 1).value(m_attIndex) + 1e-5 < trainInstances.instance(next) .value(m_attIndex)) { // Move class values for all Instances up to next // possible split point. m_distribution.shiftRange(1, 0, trainInstances, last, next); // Check if enough Instances in each subset and compute // values for criteria. if (Utils.grOrEq(m_distribution.perBag(0), minSplit) && Utils.grOrEq(m_distribution.perBag(1), minSplit)) { currentInfoGain = infoGainCrit.splitCritValue1(m_distribution, m_sumOfWeights, defaultEnt, rrrrr); if (Utils.gr(currentInfoGain, m_infoGain)) { m_infoGain = currentInfoGain; splitIndex = next - 1; } m_index++; } last = next; } next++; } // Was there any useful split? if (m_index == 0) return; // Compute modified information gain for best split. m_infoGain = m_infoGain - (Utils.log2(m_index) / m_sumOfWeights); if (Utils.smOrEq(m_infoGain, 0)) return; // Set instance variables' values to values for // best split. m_numSubsets = 2; m_splitPoint = (trainInstances.instance(splitIndex + 1).value(m_attIndex) + trainInstances.instance(splitIndex).value(m_attIndex)) / 2; // In case we have a numerical precision problem we need to choose the // smaller value if (m_splitPoint == trainInstances.instance(splitIndex + 1).value(m_attIndex)) { m_splitPoint = trainInstances.instance(splitIndex).value(m_attIndex); } // Restore distributioN for best split. m_distribution = new Distribution(2, trainInstances.numClasses()); m_distribution.addRange(0, trainInstances, 0, splitIndex + 1); m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss); // Compute modified gain ratio for best split. m_gainRatio = gainRatioCrit.splitCritValue1(m_distribution, m_sumOfWeights, m_infoGain, lllll); }
From source file:j48.C45Split.java
License:Open Source License
/** * Sets split point to greatest value in given data smaller or equal to old * split point. (C4.5 does this for some strange reason). *//*from w w w.j a v a 2s . c o m*/ public final void setSplitPoint(Instances allInstances) { double newSplitPoint = -Double.MAX_VALUE; double tempValue; Instance instance; if ((allInstances.attribute(m_attIndex).isNumeric()) && (m_numSubsets > 1)) { Enumeration enu = allInstances.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (!instance.isMissing(m_attIndex)) { tempValue = instance.value(m_attIndex); if (Utils.gr(tempValue, newSplitPoint) && Utils.smOrEq(tempValue, m_splitPoint)) newSplitPoint = tempValue; } } m_splitPoint = newSplitPoint; } }
From source file:j48.Distribution.java
License:Open Source License
/** * Creates a distribution with only one bag according * to instances in source.//from w w w.ja v a2 s .c o m * * @exception Exception if something goes wrong */ public Distribution(Instances source) throws Exception { m_perClassPerBag = new double[1][0]; m_perBag = new double[1]; totaL = 0; m_perClass = new double[source.numClasses()]; m_perClassPerBag[0] = new double[source.numClasses()]; Enumeration enu = source.enumerateInstances(); while (enu.hasMoreElements()) add(0, (Instance) enu.nextElement()); }
From source file:j48.Distribution.java
License:Open Source License
/** * Creates a distribution according to given instances and * split model./*from ww w . ja va2s . co m*/ * * @exception Exception if something goes wrong */ public Distribution(Instances source, ClassifierSplitModel modelToUse) throws Exception { int index; Instance instance; double[] weights; m_perClassPerBag = new double[modelToUse.numSubsets()][0]; m_perBag = new double[modelToUse.numSubsets()]; totaL = 0; m_perClass = new double[source.numClasses()]; for (int i = 0; i < modelToUse.numSubsets(); i++) m_perClassPerBag[i] = new double[source.numClasses()]; Enumeration enu = source.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); index = modelToUse.whichSubset(instance); if (index != -1) add(index, instance); else { weights = modelToUse.weights(instance); addWeights(instance, weights); } } }
From source file:j48.Distribution.java
License:Open Source License
/** * Adds all instances with unknown values for given attribute, weighted * according to frequency of instances in each bag. * * @exception Exception if something goes wrong */// ww w. j a v a 2 s .com public final void addInstWithUnknown(Instances source, int attIndex) throws Exception { double[] probs; double weight, newWeight; int classIndex; Instance instance; int j; probs = new double[m_perBag.length]; for (j = 0; j < m_perBag.length; j++) { if (Utils.eq(totaL, 0)) { probs[j] = 1.0 / probs.length; } else { probs[j] = m_perBag[j] / totaL; } } Enumeration enu = source.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (instance.isMissing(attIndex)) { classIndex = (int) instance.classValue(); weight = instance.weight(); m_perClass[classIndex] = m_perClass[classIndex] + weight; totaL = totaL + weight; for (j = 0; j < m_perBag.length; j++) { newWeight = probs[j] * weight; m_perClassPerBag[j][classIndex] = m_perClassPerBag[j][classIndex] + newWeight; m_perBag[j] = m_perBag[j] + newWeight; } } } }