List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:myclassifier.myC45Pack.SplitModel.java
@Override public String leftSide(Instances data) { return data.attribute(attribIndex).name(); }
From source file:myclassifier.myC45Pack.SplitModel.java
@Override public String rightSide(int index, Instances data) { StringBuffer text = new StringBuffer(); if (data.attribute(attribIndex).isNominal()) { text.append(" = ").append(data.attribute(attribIndex).value(index)); } else {/*from w ww . j a v a 2s . com*/ if (index == 0) { text.append(" <= ").append(Utils.doubleToString(splitPointValue, 6)); } else { text.append(" > ").append(Utils.doubleToString(splitPointValue, 6)); } } return text.toString(); }
From source file:myclassifier.Util.java
public static Instances setAttributeThreshold(Instances data, Attribute att, int threshold) throws Exception { Instances temp = new Instances(data); Add filter = new Add(); filter.setAttributeName("thresholded " + att.name()); filter.setAttributeIndex(String.valueOf(att.index() + 2)); filter.setNominalLabels("<=" + threshold + ",>" + threshold); filter.setInputFormat(temp);//from w ww . j a va 2 s . c o m Instances thresholdedData = Filter.useFilter(data, filter); for (int i = 0; i < thresholdedData.numInstances(); i++) { if ((int) thresholdedData.instance(i).value(thresholdedData.attribute(att.name())) <= threshold) thresholdedData.instance(i).setValue(thresholdedData.attribute("thresholded " + att.name()), "<=" + threshold); else thresholdedData.instance(i).setValue(thresholdedData.attribute("thresholded " + att.name()), ">" + threshold); } thresholdedData = wekaCode.removeAttributes(thresholdedData, String.valueOf(att.index() + 1)); thresholdedData.renameAttribute(thresholdedData.attribute("thresholded " + att.name()), att.name()); return thresholdedData; }
From source file:myclassifier.Util.java
public static Instances toNominal(Instances data) throws Exception { for (int n = 0; n < data.numAttributes(); n++) { Attribute att = data.attribute(n); if (data.attribute(n).isNumeric()) { HashSet<Integer> uniqueValues = new HashSet(); for (int i = 0; i < data.numInstances(); ++i) { uniqueValues.add((int) (data.instance(i).value(att))); }/*ww w.j a va 2 s .co m*/ List<Integer> dataValues = new ArrayList<>(uniqueValues); dataValues.sort((Integer o1, Integer o2) -> { if (o1 > o2) return 1; else return -1; }); double[] infoGains = new double[dataValues.size() - 1]; Instances[] tempInstances = new Instances[dataValues.size() - 1]; for (int i = 0; i < dataValues.size() - 1; ++i) { tempInstances[i] = setAttributeThreshold(data, att, dataValues.get(i)); infoGains[i] = calculateIG(tempInstances[i], tempInstances[i].attribute(att.name())); } data = new Instances(tempInstances[Util.indexOfMax(infoGains)]); } } return data; }
From source file:myclassifier.wekaCode.java
public static void classifyUnseenData(String[] attributes, Classifier classifiers, Instances data) throws Exception { Instance newInstance = new Instance(data.numAttributes()); newInstance.setDataset(data);/*from w w w . j a v a 2s .co m*/ for (int i = 0; i < data.numAttributes() - 1; i++) { if (Attribute.NUMERIC == data.attribute(i).type()) { Double value = Double.valueOf(attributes[i]); newInstance.setValue(i, value); } else { newInstance.setValue(i, attributes[i]); } } double clsLabel = classifiers.classifyInstance(newInstance); newInstance.setClassValue(clsLabel); String result = data.classAttribute().value((int) clsLabel); System.out.println("Hasil Classify Unseen Data Adalah: " + result); }
From source file:myclusterer.MyKMeans.java
private Instance createCentroid(Instances members) { double[] vals = new double[members.numAttributes()]; double[][] nominalDists = new double[members.numAttributes()][]; double[] weightMissing = new double[members.numAttributes()]; double[] weightNonMissing = new double[members.numAttributes()]; for (int j = 0; j < members.numAttributes(); j++) { if (members.attribute(j).isNominal()) { nominalDists[j] = new double[members.attribute(j).numValues()]; }//from w w w . j a va2 s . com } for (int i = 0; i < members.numInstances(); ++i) { Instance inst = members.instance(i); for (int j = 0; j < members.numAttributes(); j++) { if (inst.isMissing(j)) { weightMissing[j] += inst.weight(); } else { weightNonMissing[j] += inst.weight(); if (members.attribute(j).isNumeric()) vals[j] += inst.weight() * inst.value(j); else nominalDists[j][(int) inst.value(j)] += inst.weight(); } } } for (int i = 0; i < members.numAttributes(); i++) { if (members.attribute(i).isNumeric()) { if (weightNonMissing[i] > 0) { vals[i] /= weightNonMissing[i]; } else { vals[i] = Instance.missingValue(); } } else { double max = -Double.MAX_VALUE; double maxIndex = -1; for (int j = 0; j < nominalDists[i].length; j++) { if (nominalDists[i][j] > max) { max = nominalDists[i][j]; maxIndex = j; } vals[i] = max < weightMissing[i] ? Instance.missingValue() : maxIndex; } } } return new Instance(1.0, vals); }
From source file:myclusterer.WekaCode.java
public static void classifyUnseenData(String[] attributes, Clusterer clusterer, Instances data) throws Exception { Instance newInstance = new Instance(data.numAttributes()); newInstance.setDataset(data);/*from w w w . j a va2s . co m*/ for (int i = 0; i < data.numAttributes() - 1; i++) { if (Attribute.NUMERIC == data.attribute(i).type()) { Double value = Double.valueOf(attributes[i]); newInstance.setValue(i, value); } else { newInstance.setValue(i, attributes[i]); } } double clsLabel = clusterer.clusterInstance(newInstance); newInstance.setClassValue(clsLabel); String result = data.classAttribute().value((int) clsLabel); System.out.println("Hasil Classify Unseen Data Adalah: " + result); }
From source file:myID3.MyId3.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance/*from w w w.j a v a 2 s . co m*/ */ public void buildTree(Instances data) { if (data.numInstances() > 0) { // Lets find the highest Information Gain! // First compute each information gain attribute double IG[] = new double[data.numAttributes()]; Enumeration enumAttribute = data.enumerateAttributes(); while (enumAttribute.hasMoreElements()) { Attribute attribute = (Attribute) enumAttribute.nextElement(); IG[attribute.index()] = informationGain(data, attribute); // System.out.println(attribute.toString() + ": " + IG[attribute.index()]); } // Assign it as the tree attribute! currentAttribute = data.attribute(maxIndex(IG)); //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]); // IG = 0 then current node = leaf! if (Utils.eq(IG[currentAttribute.index()], 0)) { // Set the class value as the highest frequency of the class currentAttribute = null; classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); classValue = Utils.maxIndex(classDistribution); classAttribute = data.classAttribute(); } else { // Create another node from the current tree Instances[] splitData = splitDataByAttribute(data, currentAttribute); nodes = new MyId3[currentAttribute.numValues()]; for (int i = 0; i < currentAttribute.numValues(); i++) { nodes[i] = new MyId3(); nodes[i].buildTree(splitData[i]); } } } else { classAttribute = null; classValue = Utils.missingValue(); classDistribution = new double[data.numClasses()]; } }
From source file:myid3andc45classifier.Model.MyC45.java
@Override public void buildClassifier(Instances data) throws Exception { getCapabilities().testWithFail(data); data = new Instances(data); data.deleteWithMissingClass();// w ww . j a v a 2 s . c o m Enumeration enumAtt = data.enumerateAttributes(); while (enumAtt.hasMoreElements()) { Attribute attr = (Attribute) enumAtt.nextElement(); if (attr.isNumeric()) { ArrayList<Double> mid = new ArrayList<Double>(); Instances savedData = null; double temp, max = Double.NEGATIVE_INFINITY; // TODO: split nominal data.sort(attr); for (int i = 0; i < data.numInstances() - 1; i++) { if (data.instance(i).classValue() != data.instance(i + 1).classValue()) { if (data.attribute(attr.name() + " " + (data.instance(i + 1).value(attr) + data.instance(i).value(attr)) / 2) == null) { data = convertInstances(data, attr, (data.instance(i + 1).value(attr) + data.instance(i).value(attr)) / 2); //temp = computeInfoGainRatio(newData, newData.attribute(newData.numAttributes()-1)); //System.out.println("attribute "+newData.attribute(newData.numAttributes()-1).name()); //if (temp > max) { // max = temp; // savedData = newData; //} } } } //Penanganan Missing Value AttributeStats attributeStats = data.attributeStats(attr.index()); double mean = attributeStats.numericStats.mean; if (Double.isNaN(mean)) mean = 0; // Replace missing value with mean Enumeration instEnumerate = data.enumerateInstances(); while (instEnumerate.hasMoreElements()) { Instance instance = (Instance) instEnumerate.nextElement(); if (instance.isMissing(attr.index())) { instance.setValue(attr.index(), mean); } } //data = new Instances(savedData); } else { //Penanganan Missing Value AttributeStats attributeStats = data.attributeStats(attr.index()); int maxIndex = 0; for (int i = 1; i < attr.numValues(); i++) { if (attributeStats.nominalCounts[maxIndex] < attributeStats.nominalCounts[i]) { maxIndex = i; } } // Replace missing value with max index Enumeration instEnumerate = data.enumerateInstances(); while (instEnumerate.hasMoreElements()) { Instance instance = (Instance) instEnumerate.nextElement(); if (instance.isMissing(attr.index())) { instance.setValue(attr.index(), maxIndex); } } } } makeMyC45Tree(data); }
From source file:myid3andc45classifier.Model.MyC45.java
public void makeMyC45Tree(Instances data) throws Exception { if (data.numInstances() == 0) { attribute = null;/* www . java 2s. co m*/ label = Instance.missingValue(); return; } //System.out.println("NEW"); double[] infoGainRatios = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); if (!att.isNumeric()) infoGainRatios[att.index()] = computeInfoGainRatio(data, att); else infoGainRatios[att.index()] = Double.NEGATIVE_INFINITY; //System.out.println(att.name() + " " + infoGainRatios[att.index()]); } // TODO: build the tree attribute = data.attribute(maxIndex(infoGainRatios)); //System.out.println(infoGainRatios[maxIndex(infoGainRatios)]); // Make leaf if information gain is zero. // Otherwise create successors. if (infoGainRatios[maxIndex(infoGainRatios)] <= epsilon || Double.isNaN(infoGainRatios[maxIndex(infoGainRatios)])) { attribute = null; double[] numClasses = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); numClasses[(int) inst.classValue()]++; } label = maxIndex(numClasses); classAttribute = data.classAttribute(); } else { classAttribute = data.classAttribute(); Instances[] splitData = splitInstancesByAttribute(data, attribute); Instances[] distrData = splitInstancesByAttribute(data, data.classAttribute()); distribution = new double[distrData.length]; for (int j = 0; j < distribution.length; j++) { distribution[j] = distrData[j].numInstances(); } successors = new MyC45[attribute.numValues()]; for (int j = 0; j < attribute.numValues(); j++) { successors[j] = new MyC45(); successors[j].buildClassifier(splitData[j]); } } // TODO: prune //pruneTree(data); }