List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:machinelearningq2.BasicNaiveBayesV1.java
/** * * Performs lapalce correction to ensure there are no zero values in the * data Creating a DataFound object ensures the count starts from 1 * * @param instnc/*ww w.j a v a2 s. c o m*/ * @return * @throws Exception */ public void laplaceCorrection(Instances inst) throws ParseException { inst.setClassIndex(inst.numAttributes() - 1); for (int c = 0; c < inst.numClasses(); c++) { for (int j = 0; j < inst.numAttributes() - 1; j++) { for (int i = 0; i < inst.numDistinctValues(j); i++) { String attributeValue = inst.attribute(j).value(i); NumberFormat nf = NumberFormat.getInstance(); double atval = nf.parse(attributeValue).doubleValue(); DataFound d = new DataFound(atval, c, i); data.add(d); } } } }
From source file:machinelearningq2.ExtendedNaiveBayes.java
/** * * Build classifier will either build a gaussian or a discrete classifier * dependent on user input//from w w w . ja v a2s.c om * * @param ins * @throws Exception */ @Override public void buildClassifier(Instances ins) throws Exception { if ("d".equals(gausianOrDiscretise)) { buildDiscreteClassifier(ins); } else { countData = ins.size(); // assigns the class position of the instance ins.setClassIndex(ins.numAttributes() - 1); classValueCounts = new int[ins.numClasses()]; attributeMeans = new double[ins.numClasses()][ins.numAttributes() - 1]; attributeVariance = new double[ins.numClasses()][ins.numAttributes() - 1]; // store the values for (Instance line : ins) { double classValue = line.classValue(); classValueCounts[(int) classValue]++; for (int i = 0; i < line.numAttributes() - 1; i++) { double attributeValue = line.value(i); attributeMeans[(int) classValue][i] += attributeValue; DataFound d = new DataFound(attributeValue, classValue, i); int index = data.indexOf(d); // then it doesn't exist if (index == -1) { data.add(d); } else { data.get(index).incrementCount(); } } } System.out.println("Attribute Totals: " + Arrays.deepToString(attributeMeans)); // computes the means for (int j = 0; j < classValueCounts.length; j++) { for (int i = 0; i < ins.numAttributes() - 1; i++) { attributeMeans[j][i] = attributeMeans[j][i] / classValueCounts[j]; } } // calculate the variance for (int i = 0; i < data.size(); i++) { double cv = data.get(i).getClassValue(); double atIn = data.get(i).getAttributeIndex(); double squareDifference = Math .pow(data.get(i).getAttributeValue() - attributeMeans[(int) cv][(int) atIn], 2); attributeVariance[(int) cv][(int) atIn] += squareDifference; } for (int j = 0; j < classValueCounts.length; j++) { for (int i = 0; i < ins.numAttributes() - 1; i++) { attributeVariance[j][i] = attributeVariance[j][i] / (classValueCounts[j] - 1); attributeVariance[j][i] = Math.sqrt(attributeVariance[j][i]); } } System.out.println("Attribute Means: " + Arrays.deepToString(attributeMeans)); System.out.println("Variance: " + Arrays.deepToString(attributeVariance)); } }
From source file:machinelearningq2.ExtendedNaiveBayes.java
/** * The method buildDiscreteClassifier discretizes the data and then builds a * classifer/*from w ww. ja v a2s . c o m*/ * * @param instnc * @return * @throws Exception */ public void buildDiscreteClassifier(Instances ins) throws Exception { ins = discretize(ins); ins.setClassIndex(ins.numAttributes() - 1); countData = ins.size(); // assigns the class position of the instance classValueCounts = new int[ins.numClasses()]; // store the values for (Instance line : ins) { double classValue = line.classValue(); classValueCounts[(int) classValue]++; for (int i = 0; i < line.numAttributes() - 1; i++) { double attributeValue = line.value(i); DataFound d = new DataFound(attributeValue, classValue, i); int index = data.indexOf(d); // then it doesn't exist if (index == -1) { data.add(d); } else { data.get(index).incrementCount(); } } } }
From source file:machinelearningq2.MachineLearningQ2.java
/** * @param args the command line arguments * @throws java.lang.Exception/* w w w . jav a2 s .co m*/ */ public static void main(String[] args) throws Exception { // TODO code application logic here // paths for the training and test data String trainingDataPath = "datasets/diabetes/diabetes-train.arff"; String testDataPath = "datasets/diabetes/diabetes-test.arff"; /* String trainingDataPath = "datasets/crime.arff"; String testDataPath = "datasets/crimeTest.arff"; */ // creating the instances Instances trainingData = getData(trainingDataPath); Instances testData = getData(testDataPath); /* BasicNaiveBayesV1 b = new BasicNaiveBayesV1(true); b.buildClassifier(trainingData); for (Instance x : testData) { b.classifyInstance(x); } */ ExtendedNaiveBayes c = new ExtendedNaiveBayes(true, "d"); c.buildClassifier(trainingData); testData.setClassIndex(testData.numAttributes() - 1); for (Instance x : testData) { c.classifyInstance(x); } c.getAccuracy(); }
From source file:machinelearning_cw.WekaLoader.java
public static Instances loadData(String fullPath) { Instances d = null; FileReader r;/*from w w w.j a va 2 s. co m*/ try { r = new FileReader(fullPath); d = new Instances(r); d.setClassIndex(d.numAttributes() - 1); } catch (IOException e) { System.out.println("Unable to load data on path " + fullPath + " Exception thrown =" + e); System.exit(0); } return d; }
From source file:main.coba.java
public static void main(String[] args) throws Exception { BufferedReader breader = null; breader = new BufferedReader(new FileReader("src/main/Team.arff")); Instances inputTrain = new Instances(breader); inputTrain.setClassIndex(inputTrain.numAttributes() - 1); breader.close();// ww w . j a va2s.c om FeedForwardNeuralNetworkAlgorithm FFNN = new FeedForwardNeuralNetworkAlgorithm(inputTrain); FFNN.buildModel(1, 5); FFNN.printModel(); FFNN.printAllWeights(); double[] arr = inputTrain.get(60).toDoubleArray(); FFNN.setInputLayer(arr); FFNN.determineOutput(inputTrain.get(60)); System.out.println(FFNN.getClassOutputValues()); FFNN.updateModel(inputTrain.get(60)); FFNN.printModel(); FFNN.printAllWeights(); System.out.println("Class : " + FFNN.getClassOutputValues()); System.out.println("\nupdate again!!!!\n"); FFNN.clearModel(); arr = null; arr = inputTrain.get(61).toDoubleArray(); FFNN.setInputLayer(arr); FFNN.determineOutput(inputTrain.get(61)); FFNN.updateModel(inputTrain.get(61)); FFNN.printModel(); FFNN.printAllWeights(); System.out.println("Class : " + FFNN.getClassOutputValues()); System.out.println("\nupdate again!!!!\n"); FFNN.clearModel(); arr = null; arr = inputTrain.get(62).toDoubleArray(); FFNN.setInputLayer(arr); FFNN.determineOutput(inputTrain.get(62)); FFNN.updateModel(inputTrain.get(62)); FFNN.printModel(); FFNN.printAllWeights(); System.out.println("Class : " + FFNN.getClassOutputValues()); }
From source file:main.mFFNN.java
public static void main(String[] args) throws Exception { mFFNN m = new mFFNN(); BufferedReader breader = null; breader = new BufferedReader(new FileReader("src\\main\\iris.arff")); Instances fileTrain = new Instances(breader); fileTrain.setClassIndex(fileTrain.numAttributes() - 1); System.out.println(fileTrain); breader.close();//from www . jav a 2 s. c o m System.out.println("mFFNN!!!\n\n"); FeedForwardNeuralNetwork FFNN = new FeedForwardNeuralNetwork(); Evaluation eval = new Evaluation(fileTrain); FFNN.buildClassifier(fileTrain); eval.evaluateModel(FFNN, fileTrain); //OUTPUT Scanner scan = new Scanner(System.in); System.out.println(eval.toSummaryString("=== Stratified cross-validation ===\n" + "=== Summary ===", true)); System.out.println(eval.toClassDetailsString("=== Detailed Accuracy By Class ===")); System.out.println(eval.toMatrixString("===Confusion matrix===")); System.out.println(eval.fMeasure(1) + " " + eval.recall(1)); System.out.println("\nDo you want to save this model(1/0)? "); FFNN.distributionForInstance(fileTrain.get(0)); /* int c = scan.nextInt(); if (c == 1 ){ System.out.print("Please enter your file name (*.model) : "); String infile = scan.next(); m.saveModel(FFNN,infile); } else { System.out.print("Model not saved."); } */ }
From source file:mao.datamining.DataSetPair.java
private void doItOnce4All() { if (didIt)//www.j a v a2 s . co m return; didIt = true; try { //step 0, remove all those empty columns, which has more than 50% missing values Instances orangeDataSet = ConverterUtils.DataSource.read(trainSourceFileName); orangeDataSet.setClassIndex(orangeDataSet.numAttributes() - 1); Attribute classAttr = orangeDataSet.attribute(orangeDataSet.numAttributes() - 1); MainLogger.log(Level.INFO, "Class Attribute: {0}", classAttr.toString()); //step 0-1, to remove all columns which has more than half missing values Instances newData = orangeDataSet; RemoveUselessColumnsByMissingValues removeMissingValuesColumns = new RemoveUselessColumnsByMissingValues(); removeMissingValuesColumns.setM_maxMissingPercentage(50); removeMissingValuesColumns.setManualDeleteColumns(columns2Delete); removeMissingValuesColumns.setInputFormat(newData); newData = Filter.useFilter(newData, removeMissingValuesColumns); Main.logging("== New Data After Removing all Columns having >50% missing values: ===\n" + newData.toSummaryString()); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")))) { writer.write(newData.toString()); } //step 0-2 to transform those numeric columns to Nominal //to delete those instances with more than half missing values BufferedReader reader70 = new BufferedReader(new InputStreamReader( new FileInputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff"))); BufferedWriter writerAfterDeleteRows = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveRows2.arff"))); int columnNum = newData.numAttributes(); int totalInstanceNum = newData.numInstances(), deleteM1Num = 0, delete1Num = 0; String line = null; int missingColumnNum = 0; while ((line = reader70.readLine()) != null) { missingColumnNum = 0; for (int i = 0; i < line.length(); i++) { if (line.charAt(i) == '?') missingColumnNum++; } if (missingColumnNum * 100 / columnNum < 50) { writerAfterDeleteRows.write(line); writerAfterDeleteRows.newLine(); } else { System.out.println("Delete Row: [" + line + "]"); if (line.endsWith("-1")) { deleteM1Num++; } else { delete1Num++; } } } System.out.println("Total: " + totalInstanceNum + ", delete class -1: " + deleteM1Num + ", delete class 1: " + delete1Num); reader70.close(); writerAfterDeleteRows.close(); //create sample files: createSampleDataSets(); } catch (Exception e) { Main.logging(null, e); } }
From source file:mao.datamining.DataSetPair.java
private void createSampleDataSets() { try {/*from w w w.j av a 2 s. c o m*/ //reload the new data from new arff file: Main.OrangeProcessedDSHome+"/afterRemoveRows.arff" Instances newData = ConverterUtils.DataSource .read(Main.OrangeProcessedDSHome + "/afterRemoveRows2.arff"); newData.setClassIndex(newData.numAttributes() - 1); //create none sample file // Main.logging("== New Data After Doing Nothing, waiting for CostMatrix: ===\n" + newData.toSummaryString()); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterNoneSampling.arff")))) { writer.write(newData.toString()); } //create under sample file // System.out.println("Under Samplessssssssssssssssssssssssssssssssssssss"); SpreadSubsample underSampleFilter = new weka.filters.supervised.instance.SpreadSubsample(); underSampleFilter.setInputFormat(newData); String underOptionsClone[] = new String[underSampleFilterOptions.length]; System.arraycopy(underSampleFilterOptions, 0, underOptionsClone, 0, underSampleFilterOptions.length); underSampleFilter.setOptions(underOptionsClone); Instances underNewData = Filter.useFilter(newData, underSampleFilter); // Main.logging("== New Data After Under Sampling: ===\n" + underNewData.toSummaryString()); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterUnderSampling.arff")))) { writer.write(underNewData.toString()); } //create over sample file // System.out.println("Over Samplessssssssssssssssssssssssssssssssssssss"); //weka.filters.supervised.instance.SMOTE -C 0 -K 5 -P 1000.0 -S 1 smoteOptions SMOTE smote = new weka.filters.supervised.instance.SMOTE(); smote.setInputFormat(newData); String overOptionsClone[] = new String[overSampleSmoteOptions.length]; System.arraycopy(overSampleSmoteOptions, 0, overOptionsClone, 0, overSampleSmoteOptions.length); smote.setOptions(overOptionsClone); Instances overNewData = Filter.useFilter(newData, smote); // Main.logging("== New Data After Over Sampling: ===\n" + overNewData.toSummaryString()); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterOverSampling.arff")))) { writer.write(overNewData.toString()); } } catch (Exception ex) { Logger.getLogger(DataSetPair.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:mao.datamining.DataSetPair.java
/** * Pre-Process the training data set with: * RemoveUselessColumnsByMissingValues filter * SpreadSubsample filter to shrink the majority class instances * AttributeSelection filter with CfsSubsetEval and LinearForwardSelection *//* w w w.j a v a 2s . co m*/ private void processTrainRawData() { System.out.println("====================" + this.trainFileName + "===================="); System.out.println("====================" + this.trainFileName + "===================="); System.out.println("====================" + this.trainFileName + "===================="); finalTrainAttrList.clear(); try { doItOnce4All(); String sampleFilePath = null; //step 2, either over sample, or under sample //weka.filters.supervised.instance.SpreadSubsample if (this.resampleMethod.equalsIgnoreCase(resampleUnder)) { System.out.println("Under Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterUnderSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleOver)) { System.out.println("Over Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterOverSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleNone)) { //do nothing, System.out.println("None Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleMatrix)) { //do nothing System.out.println("Matrix Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff"; } else { doNotSupport(); } Instances newData = ConverterUtils.DataSource.read(sampleFilePath); newData.setClassIndex(newData.numAttributes() - 1); // Main.logging("== New Data After Resampling class instances: ===\n" + newData.toSummaryString()); //Step 3, select features AttributeSelection attrSelectionFilter = new AttributeSelection(); ASEvaluation eval = null; ASSearch search = null; //ranker if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionA)) { System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); eval = new weka.attributeSelection.InfoGainAttributeEval(); //weka.attributeSelection.Ranker -T 0.02 -N -1 search = new Ranker(); String rankerOptios[] = { "-T", "0.01", "-N", "-1" }; if (resampleMethod.equalsIgnoreCase(resampleOver)) { rankerOptios[1] = "0.1"; } ((Ranker) search).setOptions(rankerOptios); Main.logging("== Start to Select Features with InfoGainAttributeEval and Ranker"); } //weka.attributeSelection.LinearForwardSelection -D 0 -N 5 -I -K 50 -T 0 else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionB)) { System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); eval = new CfsSubsetEval(); search = new LinearForwardSelection(); String linearOptios[] = { "-D", "0", "-N", "5", "-I", "-K", "50", "-T", "0" }; ((LinearForwardSelection) search).setOptions(linearOptios); Main.logging("== Start to Select Features with CfsSubsetEval and LinearForwardSelection"); } else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionNo)) { System.out.println("None Selection ssssssssssssssssssssssssssssssssssssss"); Main.logging("No Feature Selection Method"); } else { doNotSupport(); } if (eval != null) { attrSelectionFilter.setEvaluator(eval); attrSelectionFilter.setSearch(search); attrSelectionFilter.setInputFormat(newData); newData = Filter.useFilter(newData, attrSelectionFilter); } Main.logging("== New Data After Selecting Features: ===\n" + newData.toSummaryString()); //finally, write the final dataset to file system try (BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(this.trainFileName)))) { writer.write(newData.toString()); } int numAttributes = newData.numAttributes(); for (int i = 0; i < numAttributes; i++) { String attrName = newData.attribute(i).name(); finalTrainAttrList.add(attrName); } Main.logging(finalTrainAttrList.toString()); // //set the final train dataset finalTrainDataSet = newData; finalTrainDataSet.setClassIndex(finalTrainDataSet.numAttributes() - 1); Main.logging("train dataset class attr: " + finalTrainDataSet.classAttribute().toString()); } catch (Exception ex) { Main.logging(null, ex); } }