List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:LVCoref.WekaWrapper.java
License:Open Source License
public static void main1(String[] args) throws Exception { FastVector atts;//from www. jav a2 s . com FastVector attsRel; FastVector attVals; FastVector attValsRel; Instances data; Instances dataRel; double[] vals; double[] valsRel; int i; // 1. set up attributes atts = new FastVector(); // - numeric atts.addElement(new Attribute("att1")); // - nominal attVals = new FastVector(); for (i = 0; i < 5; i++) attVals.addElement("val" + (i + 1)); atts.addElement(new Attribute("att2", attVals)); // - string atts.addElement(new Attribute("att3", (FastVector) null)); // - date atts.addElement(new Attribute("att4", "yyyy-MM-dd")); // - relational attsRel = new FastVector(); // -- numeric attsRel.addElement(new Attribute("att5.1")); // -- nominal attValsRel = new FastVector(); for (i = 0; i < 5; i++) attValsRel.addElement("val5." + (i + 1)); attsRel.addElement(new Attribute("att5.2", attValsRel)); dataRel = new Instances("att5", attsRel, 0); atts.addElement(new Attribute("att5", dataRel, 0)); // 2. create Instances object data = new Instances("MyRelation", atts, 0); // 3. fill with data // first instance vals = new double[data.numAttributes()]; // - numeric vals[0] = Math.PI; // - nominal vals[1] = attVals.indexOf("val3"); // - string vals[2] = data.attribute(2).addStringValue("This is a string!"); // - date vals[3] = data.attribute(3).parseDate("2001-11-09"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.PI + 1; valsRel[1] = attValsRel.indexOf("val5.3"); dataRel.add(new Instance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.PI + 2; valsRel[1] = attValsRel.indexOf("val5.2"); dataRel.add(new Instance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new Instance(1.0, vals)); // second instance vals = new double[data.numAttributes()]; // important: needs NEW array! // - numeric vals[0] = Math.E; // - nominal vals[1] = attVals.indexOf("val1"); // - string vals[2] = data.attribute(2).addStringValue("And another one!"); // - date vals[3] = data.attribute(3).parseDate("2000-12-01"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.E + 1; valsRel[1] = attValsRel.indexOf("val5.4"); dataRel.add(new Instance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.E + 2; valsRel[1] = attValsRel.indexOf("val5.1"); dataRel.add(new Instance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new Instance(1.0, vals)); // 4. output data System.out.println(data); }
From source file:machinelearningproject.RFTree.java
@Override public Tree buildTree(Instances instances) throws Exception { Tree tree = new Tree(); ArrayList<String> availableAttributes = new ArrayList(); int largestInfoGainAttrIdx = -1; double largestInfoGainAttrValue = 0.0; //choose random fraction int numAttr = instances.numAttributes(); int k = (int) round(sqrt(numAttr)); ArrayList<Integer> randomIdx = randomFraction(numAttr); for (int idx = 0; idx < k; idx++) { if (idx != instances.classIndex()) { availableAttributes.add(instances.attribute(idx).name()); }//from ww w. j av a 2 s. c o m } if (instances.numInstances() == 0) { return null; } else if (calculateClassEntropy(instances) == 0.0) { // all examples have the sama classification tree.attributeName = instances.get(0).stringValue(instances.classIndex()); } else if (availableAttributes.isEmpty()) { // mode classification tree.attributeName = getModeClass(instances, instances.classIndex()); } else { for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex()); if (largestInfoGainAttrValue < attrInfoGain) { largestInfoGainAttrIdx = idx; largestInfoGainAttrValue = attrInfoGain; } } } if (largestInfoGainAttrIdx != -1) { tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name(); ArrayList<String> attrValues = new ArrayList(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.get(i); String attrValue = instance.stringValue(largestInfoGainAttrIdx); if (attrValues.isEmpty() || !attrValues.contains(attrValue)) { attrValues.add(attrValue); } } for (String attrValue : attrValues) { Node node = new Node(attrValue); Instances copyInstances = new Instances(instances); copyInstances.setClassIndex(instances.classIndex()); int i = 0; while (i < copyInstances.numInstances()) { Instance instance = copyInstances.get(i); // reducing examples if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) { copyInstances.delete(i); i--; } i++; } copyInstances.deleteAttributeAt(largestInfoGainAttrIdx); node.subTree = buildTree(copyInstances); tree.nodes.add(node); } } } return tree; }
From source file:machinelearningproject.Tree.java
public Tree buildTree(Instances instances) throws Exception { Tree tree = new Tree(); ArrayList<String> availableAttributes = new ArrayList(); int largestInfoGainAttrIdx = -1; double largestInfoGainAttrValue = 0.0; for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { availableAttributes.add(instances.attribute(idx).name()); }/*from w w w . j a va2s . com*/ } if (instances.numInstances() == 0) { return null; } else if (calculateClassEntropy(instances) == 0.0) { // all examples have the sama classification tree.attributeName = instances.get(0).stringValue(instances.classIndex()); } else if (availableAttributes.isEmpty()) { // mode classification tree.attributeName = getModeClass(instances, instances.classIndex()); } else { for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex()); if (largestInfoGainAttrValue < attrInfoGain) { largestInfoGainAttrIdx = idx; largestInfoGainAttrValue = attrInfoGain; } } } if (largestInfoGainAttrIdx != -1) { tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name(); ArrayList<String> attrValues = new ArrayList(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.get(i); String attrValue = instance.stringValue(largestInfoGainAttrIdx); if (attrValues.isEmpty() || !attrValues.contains(attrValue)) { attrValues.add(attrValue); } } for (String attrValue : attrValues) { Node node = new Node(attrValue); Instances copyInstances = new Instances(instances); copyInstances.setClassIndex(instances.classIndex()); int i = 0; while (i < copyInstances.numInstances()) { Instance instance = copyInstances.get(i); // reducing examples if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) { copyInstances.delete(i); i--; } i++; } copyInstances.deleteAttributeAt(largestInfoGainAttrIdx); node.subTree = buildTree(copyInstances); tree.nodes.add(node); } } } return tree; }
From source file:machinelearningq2.BasicNaiveBayesV1.java
/** * * Performs lapalce correction to ensure there are no zero values in the * data Creating a DataFound object ensures the count starts from 1 * * @param instnc// w w w. j a v a 2s. co m * @return * @throws Exception */ public void laplaceCorrection(Instances inst) throws ParseException { inst.setClassIndex(inst.numAttributes() - 1); for (int c = 0; c < inst.numClasses(); c++) { for (int j = 0; j < inst.numAttributes() - 1; j++) { for (int i = 0; i < inst.numDistinctValues(j); i++) { String attributeValue = inst.attribute(j).value(i); NumberFormat nf = NumberFormat.getInstance(); double atval = nf.parse(attributeValue).doubleValue(); DataFound d = new DataFound(atval, c, i); data.add(d); } } } }
From source file:machine_learing_clasifier.MyC45.java
@Override public void buildClassifier(Instances i) throws Exception { if (!i.classAttribute().isNominal()) { throw new Exception("Class not nominal"); }/*from w ww. j a v a2 s . com*/ //penanganan missing value for (int j = 0; j < i.numAttributes(); j++) { Attribute attr = i.attribute(j); for (int k = 0; k < i.numInstances(); k++) { Instance inst = i.instance(k); if (inst.isMissing(attr)) { inst.setValue(attr, fillMissingValue(i, attr)); //bisa dituning lagi performancenya } } } i = new Instances(i); i.deleteWithMissingClass(); makeTree(i); }
From source file:machine_learing_clasifier.MyC45.java
public void makeTree(Instances data) throws Exception { if (data.numInstances() == 0) { return;/*from w w w . j av a 2 s . c om*/ } double[] infoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { Attribute att = data.attribute(i); if (data.classIndex() != att.index()) { if (att.isNominal()) { infoGains[att.index()] = computeInformationGain(data, att); } else { infoGains[att.index()] = computeInformationGainContinous(data, att, BestContinousAttribute(data, att)); } } } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); if (m_Attribute.isNumeric()) { numericAttThreshold = BestContinousAttribute(data, m_Attribute); System.out.println(" ini kalo continous dengan attribut : " + numericAttThreshold); } System.out.println("huhu = " + m_Attribute.toString()); if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { int inst = (int) data.instance(i).value(data.classAttribute()); m_Distribution[inst]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData; if (m_Attribute.isNominal()) { splitData = splitData(data, m_Attribute); } else { splitData = splitDataContinous(data, m_Attribute, numericAttThreshold); } if (m_Attribute.isNominal()) { System.out.println("nominal"); m_Successors = new MyC45[m_Attribute.numValues()]; System.out.println(m_Successors.length); for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyC45(head, this); m_Successors[j].buildClassifier(splitData[j]); } } else { System.out.println("numeric"); m_Successors = new MyC45[2]; System.out.println(m_Successors.length); for (int j = 0; j < 2; j++) { m_Successors[j] = new MyC45(head, this); m_Successors[j].buildClassifier(splitData[j]); } } } }
From source file:machine_learing_clasifier.MyID3.java
@Override public void buildClassifier(Instances i) throws Exception { if (!i.classAttribute().isNominal()) { throw new Exception("Class not nominal"); }/*w w w.j av a2 s . com*/ for (int j = 0; j < i.numAttributes(); j++) { Attribute attr = i.attribute(j); if (!attr.isNominal()) { throw new Exception("Attribute not nominal"); } for (int k = 0; k < i.numInstances(); k++) { Instance inst = i.instance(k); if (inst.isMissing(attr)) { throw new Exception("Missing value"); } } } i = new Instances(i); i.deleteWithMissingClass(); makeTree(i); }
From source file:machine_learing_clasifier.MyID3.java
public void makeTree(Instances data) throws Exception { if (data.numInstances() == 0) { return;//from w w w. j a v a 2s .c o m } double[] infoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { Attribute att = data.attribute(i); if (data.classIndex() != att.index()) { infoGains[att.index()] = computeInformationGain(data, att); } } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); //System.out.println("huhu = " + m_Attribute.toString()); if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { int inst = (int) data.instance(i).value(data.classAttribute()); m_Distribution[inst]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new MyID3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyID3(); m_Successors[j].buildClassifier(splitData[j]); } } }
From source file:mao.datamining.DataSetPair.java
private void doItOnce4All() { if (didIt)// w w w.j a v a2 s .co m return; didIt = true; try { //step 0, remove all those empty columns, which has more than 50% missing values Instances orangeDataSet = ConverterUtils.DataSource.read(trainSourceFileName); orangeDataSet.setClassIndex(orangeDataSet.numAttributes() - 1); Attribute classAttr = orangeDataSet.attribute(orangeDataSet.numAttributes() - 1); MainLogger.log(Level.INFO, "Class Attribute: {0}", classAttr.toString()); //step 0-1, to remove all columns which has more than half missing values Instances newData = orangeDataSet; RemoveUselessColumnsByMissingValues removeMissingValuesColumns = new RemoveUselessColumnsByMissingValues(); removeMissingValuesColumns.setM_maxMissingPercentage(50); removeMissingValuesColumns.setManualDeleteColumns(columns2Delete); removeMissingValuesColumns.setInputFormat(newData); newData = Filter.useFilter(newData, removeMissingValuesColumns); Main.logging("== New Data After Removing all Columns having >50% missing values: ===\n" + newData.toSummaryString()); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")))) { writer.write(newData.toString()); } //step 0-2 to transform those numeric columns to Nominal //to delete those instances with more than half missing values BufferedReader reader70 = new BufferedReader(new InputStreamReader( new FileInputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff"))); BufferedWriter writerAfterDeleteRows = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveRows2.arff"))); int columnNum = newData.numAttributes(); int totalInstanceNum = newData.numInstances(), deleteM1Num = 0, delete1Num = 0; String line = null; int missingColumnNum = 0; while ((line = reader70.readLine()) != null) { missingColumnNum = 0; for (int i = 0; i < line.length(); i++) { if (line.charAt(i) == '?') missingColumnNum++; } if (missingColumnNum * 100 / columnNum < 50) { writerAfterDeleteRows.write(line); writerAfterDeleteRows.newLine(); } else { System.out.println("Delete Row: [" + line + "]"); if (line.endsWith("-1")) { deleteM1Num++; } else { delete1Num++; } } } System.out.println("Total: " + totalInstanceNum + ", delete class -1: " + deleteM1Num + ", delete class 1: " + delete1Num); reader70.close(); writerAfterDeleteRows.close(); //create sample files: createSampleDataSets(); } catch (Exception e) { Main.logging(null, e); } }
From source file:mao.datamining.DataSetPair.java
/** * Pre-Process the training data set with: * RemoveUselessColumnsByMissingValues filter * SpreadSubsample filter to shrink the majority class instances * AttributeSelection filter with CfsSubsetEval and LinearForwardSelection *///ww w .j ava 2 s. c o m private void processTrainRawData() { System.out.println("====================" + this.trainFileName + "===================="); System.out.println("====================" + this.trainFileName + "===================="); System.out.println("====================" + this.trainFileName + "===================="); finalTrainAttrList.clear(); try { doItOnce4All(); String sampleFilePath = null; //step 2, either over sample, or under sample //weka.filters.supervised.instance.SpreadSubsample if (this.resampleMethod.equalsIgnoreCase(resampleUnder)) { System.out.println("Under Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterUnderSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleOver)) { System.out.println("Over Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterOverSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleNone)) { //do nothing, System.out.println("None Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleMatrix)) { //do nothing System.out.println("Matrix Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff"; } else { doNotSupport(); } Instances newData = ConverterUtils.DataSource.read(sampleFilePath); newData.setClassIndex(newData.numAttributes() - 1); // Main.logging("== New Data After Resampling class instances: ===\n" + newData.toSummaryString()); //Step 3, select features AttributeSelection attrSelectionFilter = new AttributeSelection(); ASEvaluation eval = null; ASSearch search = null; //ranker if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionA)) { System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); eval = new weka.attributeSelection.InfoGainAttributeEval(); //weka.attributeSelection.Ranker -T 0.02 -N -1 search = new Ranker(); String rankerOptios[] = { "-T", "0.01", "-N", "-1" }; if (resampleMethod.equalsIgnoreCase(resampleOver)) { rankerOptios[1] = "0.1"; } ((Ranker) search).setOptions(rankerOptios); Main.logging("== Start to Select Features with InfoGainAttributeEval and Ranker"); } //weka.attributeSelection.LinearForwardSelection -D 0 -N 5 -I -K 50 -T 0 else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionB)) { System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); eval = new CfsSubsetEval(); search = new LinearForwardSelection(); String linearOptios[] = { "-D", "0", "-N", "5", "-I", "-K", "50", "-T", "0" }; ((LinearForwardSelection) search).setOptions(linearOptios); Main.logging("== Start to Select Features with CfsSubsetEval and LinearForwardSelection"); } else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionNo)) { System.out.println("None Selection ssssssssssssssssssssssssssssssssssssss"); Main.logging("No Feature Selection Method"); } else { doNotSupport(); } if (eval != null) { attrSelectionFilter.setEvaluator(eval); attrSelectionFilter.setSearch(search); attrSelectionFilter.setInputFormat(newData); newData = Filter.useFilter(newData, attrSelectionFilter); } Main.logging("== New Data After Selecting Features: ===\n" + newData.toSummaryString()); //finally, write the final dataset to file system try (BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(this.trainFileName)))) { writer.write(newData.toString()); } int numAttributes = newData.numAttributes(); for (int i = 0; i < numAttributes; i++) { String attrName = newData.attribute(i).name(); finalTrainAttrList.add(attrName); } Main.logging(finalTrainAttrList.toString()); // //set the final train dataset finalTrainDataSet = newData; finalTrainDataSet.setClassIndex(finalTrainDataSet.numAttributes() - 1); Main.logging("train dataset class attr: " + finalTrainDataSet.classAttribute().toString()); } catch (Exception ex) { Main.logging(null, ex); } }