List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:farm_ads.MyClassifier.java
public Instances readIntancesVecto(String URL) throws Exception { DataSource source = new DataSource(URL); Instances instances = source.getDataSet(); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); }/*from w w w .j av a2 s .c om*/ return instances; }
From source file:farm_ads.MyClassifier.java
public String ClassifyInstance(Classifier c, String instance) throws Exception { String format = "%4s %15s %15s\n"; FarmAds fa = new FarmAds(instance, 1); FarmAdsVector fav = new FarmAdsVector(); fav.writeFile("data\\dataVecto.dat", fa); DataSource source = new DataSource("data\\dataVecto.dat"); Instances instances = source.getDataSet(); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); }// w w w . jav a2s .c om String s = new String(); s += "======= Kt qu d on qung co========\n"; s += String.format(format, "STT", "Trc d on", "Sau d on"); String[] classAds = { "Ph hp", "Khng Ph Hp" }; double actValue = instances.firstInstance().classValue(); Instance newInst = instances.firstInstance(); double pred = c.classifyInstance(newInst); s += String.format(format, Integer.toString(1), classAds[(int) actValue], classAds[(int) pred]); if (actValue == pred) { s += "\n\n ==> D on ng"; } else { s += "\n\n ==> D on sai"; } return s; }
From source file:feature.InfoGainEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Discretizes all * attributes that are numeric.// w ww . ja va 2s. co m * * @param data * set of instances serving as training data * @throws Exception * if the evaluator has not been generated successfully */ public double computeInfoGain(Instances data, int att) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute info gains m_InfoGains = new double[data.numAttributes()]; m_InfoGains[att] = (ContingencyTables.entropyOverColumns(counts[att]) - ContingencyTables.entropyConditionedOnRows(counts[att])); return m_InfoGains[att]; }
From source file:feature.InfoGainEval.java
License:Open Source License
public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else {//from w ww . j a v a 2 s .c om NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute info gains m_InfoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_InfoGains[i] = (ContingencyTables.entropyOverColumns(counts[i]) - ContingencyTables.entropyConditionedOnRows(counts[i])); } } }
From source file:FeatureSelection.ReliefFAttributeEval.java
License:Open Source License
public Instances createReliefInput(ArrayList<ArrayList<Double>> dataset, String[] featureNames_Arr, ArrayList<Double> labels) { this.featureNames_Arr = featureNames_Arr; // create attributes FastVector fv = new FastVector(); for (int i = 0; i <= featureNames_Arr.length; i++) { if (i == featureNames_Arr.length) { fv.addElement(new Attribute("@@class@@")); continue; }/*w w w .j a va2s . co m*/ fv.addElement(new Attribute(featureNames_Arr[i])); } // transform dataset so that each line represents each window - add // class label as well ArrayList<ArrayList<Double>> ReliefInput = new ArrayList<ArrayList<Double>>(); for (int i = 0; i < dataset.get(0).size(); i++) { ArrayList<Double> featT = new ArrayList<Double>(); for (int j = 0; j < dataset.size(); j++) { featT.add(dataset.get(j).get(i)); } featT.add(labels.get(i)); ReliefInput.add(featT); } // transform dataset into Instances type Instances ReliefInstances = new Instances("Features", fv, dataset.size()); for (int i = 0; i < ReliefInput.size(); i++) { double[] vals = CollectionUtilities.listToArray(ReliefInput.get(i)); Instance instWeka = new Instance(vals.length); for (int j = 0; j < vals.length; j++) { instWeka.setValue(j, vals[j]); } ReliefInstances.add(instWeka); } ReliefInstances.setClassIndex(ReliefInstances.numAttributes() - 1); return ReliefInstances; }
From source file:feature_construction.GeneticProgramming.java
License:LGPL
public static double[][] convertInstancesToInputFeaturesArray(String fileName) { // Create instances (file that contains the inputs to feed through the program) double[][] inputFeatures; try {//from w w w. jav a2 s .co m //load CSV CSVLoader loaderInputs = new CSVLoader(); loaderInputs.setSource(new File(fileName)); Instances inputSet = loaderInputs.getDataSet(); inputSet.setClassIndex(inputSet.numAttributes() - 1); inputFeatures = new double[inputSet.numInstances()][inputSet.numAttributes()]; // Convert instances to double[][] for (int i = 0; i < inputSet.numInstances(); i++) { for (int j = 0; j < inputSet.numAttributes(); j++) { inputFeatures[i][j] = inputSet.get(i).value(j); } } return inputFeatures; } catch (Exception e) { e.printStackTrace(); } return null; }
From source file:ffnn.FFNN.java
public static Instances preprocess(Instances i) { try {// www .j a v a2 s . co m Reorder rfilter = new Reorder(); int classIdx = i.classIndex() + 1; String order; if (classIdx != 1) { order = "1"; for (int j = 2; j <= i.numAttributes(); j++) { if (j != classIdx) { order = order + "," + j; } } } else { order = "2"; for (int j = 3; j <= i.numAttributes(); j++) { order = order + "," + j; } } order = order + "," + classIdx; rfilter.setAttributeIndices(order); rfilter.setInputFormat(i); i = Filter.useFilter(i, rfilter); StringToNominal stnfilter = new StringToNominal(); stnfilter.setAttributeRange("first-last"); stnfilter.setInputFormat(i); i = Filter.useFilter(i, stnfilter); NominalToBinary ntbfilter = new NominalToBinary(); ntbfilter.setInputFormat(i); i = Filter.useFilter(i, ntbfilter); Normalize nfilter = new Normalize(); nfilter.setInputFormat(i); i = Filter.useFilter(i, nfilter); } catch (Exception e) { System.out.println(e.toString()); } return i; }
From source file:ffnn.FFNN.java
/** * @param args the command line arguments *//* w ww . j a v a 2 s . c o m*/ public static void main(String[] args) throws Exception { FFNNTubesAI cls; Scanner scan = new Scanner(System.in); System.out.print("new / read? (n/r)"); if (scan.next().equals("n")) { cls = new FFNNTubesAI(); } else { cls = (FFNNTubesAI) TucilWeka.readModel(); } int temp; Instances data = TucilWeka.readDataSet("C:\\Program Files\\Weka-3-8\\data\\Team.arff"); //Tampilkan opsi for (int i = 0; i < data.numAttributes(); i++) { System.out.println(i + ". " + data.attribute(i)); } System.out.print("Class Index : "); temp = scan.nextInt(); data.setClassIndex(temp); data = preprocess(data); System.out.println(data); System.out.print("full train? (y/n)"); if (scan.next().equals("y")) { try { cls.buildClassifier(data); } catch (Exception ex) { Logger.getLogger(FFNNTubesAI.class.getName()).log(Level.SEVERE, null, ex); } } int fold = 10; //FFNNTubesAI.printMatrix(cls.weight1, cls.input_layer+1, cls.hidden_layer); //FFNNTubesAI.printMatrix(cls.weight2, cls.hidden_layer, cls.output_layer); //FFNNTubesAI.printMatrix(cls.bias2, 1, cls.output_layer); Evaluation eval = new Evaluation(data); System.out.print("eval/10-fold? (e/f)"); if (scan.next().equals("e")) { eval.evaluateModel(cls, data); } else { eval.crossValidateModel(cls, data, fold, new Random(1)); } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); System.out.println(eval.toClassDetailsString()); }
From source file:ffnn.FFNNTubesAI.java
@Override public void buildClassifier(Instances i) throws Exception { Instance temp_instance = null;//from w w w . j a v a2 s . c o m RealMatrix error_output; RealMatrix error_hidden; RealMatrix input_matrix; RealMatrix hidden_matrix; RealMatrix output_matrix; Instances temp_instances; int r = 0; Scanner scan = new Scanner(System.in); output_layer = i.numDistinctValues(i.classIndex()); //3 temp_instances = filterNominalNumeric(i); if (output_layer == 2) { Add filter = new Add(); filter.setAttributeIndex("last"); filter.setAttributeName("dummy"); filter.setInputFormat(temp_instances); temp_instances = Filter.useFilter(temp_instances, filter); // System.out.println(temp_instances); for (int j = 0; j < temp_instances.numInstances(); j++) { if (temp_instances.instance(j).value(temp_instances.numAttributes() - 2) == 0) { temp_instances.instance(j).setValue(temp_instances.numAttributes() - 2, 1); temp_instances.instance(j).setValue(temp_instances.numAttributes() - 1, 0); } else { temp_instances.instance(j).setValue(temp_instances.numAttributes() - 2, 0); temp_instances.instance(j).setValue(temp_instances.numAttributes() - 1, 1); } } } //temp_instances.randomize(temp_instances.getRandomNumberGenerator(1)); //System.out.println(temp_instances); input_layer = temp_instances.numAttributes() - output_layer; //4 hidden_layer = 0; while (hidden_layer < 1) { System.out.print("Hidden layer : "); hidden_layer = scan.nextInt(); } int init_hidden = hidden_layer; error_hidden = new BlockRealMatrix(1, hidden_layer); error_output = new BlockRealMatrix(1, output_layer); input_matrix = new BlockRealMatrix(1, input_layer + 1); //Menambahkan bias buildWeight(input_layer, hidden_layer, output_layer); long last_time = System.nanoTime(); double last_error_rate = 1; double best_error_rate = 1; double last_update = System.nanoTime(); // brp iterasi // for( long itr = 0; last_error_rate > 0.001; ++ itr ){ for (long itr = 0; itr < 50000; ++itr) { if (r == 10) { break; } long time = System.nanoTime(); if (time - last_time > 2000000000) { Evaluation eval = new Evaluation(i); eval.evaluateModel(this, i); double accry = eval.correct() / eval.numInstances(); if (eval.errorRate() < last_error_rate) { last_update = System.nanoTime(); if (eval.errorRate() < best_error_rate) SerializationHelper.write(accry + "-" + time + ".model", this); } if (accry > 0) last_error_rate = eval.errorRate(); // 2 minute without improvement restart if (time - last_update > 30000000000L) { last_update = System.nanoTime(); learning_rate = random() * 0.05; hidden_layer = (int) (10 + floor(random() * 15)); hidden_layer = (int) floor((hidden_layer / 25) * init_hidden); if (hidden_layer == 0) { hidden_layer = 1; } itr = 0; System.out.println("RESTART " + learning_rate + " " + hidden_layer); buildWeight(input_layer, hidden_layer, output_layer); r++; } System.out.println(accry + " " + itr); last_time = time; } for (int j = 0; j < temp_instances.numInstances(); j++) { // foward !! temp_instance = temp_instances.instance(j); for (int k = 0; k < input_layer; k++) { input_matrix.setEntry(0, k, temp_instance.value(k)); } input_matrix.setEntry(0, input_layer, 1.0); // bias hidden_matrix = input_matrix.multiply(weight1); for (int y = 0; y < hidden_layer; ++y) { hidden_matrix.setEntry(0, y, sig(hidden_matrix.getEntry(0, y))); } output_matrix = hidden_matrix.multiply(weight2).add(bias2); for (int y = 0; y < output_layer; ++y) { output_matrix.setEntry(0, y, sig(output_matrix.getEntry(0, y))); } // backward << // error layer 2 double total_err = 0; for (int k = 0; k < output_layer; k++) { double o = output_matrix.getEntry(0, k); double t = temp_instance.value(input_layer + k); double err = o * (1 - o) * (t - o); total_err += err * err; error_output.setEntry(0, k, err); } // back propagation layer 2 for (int y = 0; y < hidden_layer; y++) { for (int x = 0; x < output_layer; ++x) { double wold = weight2.getEntry(y, x); double correction = learning_rate * error_output.getEntry(0, x) * hidden_matrix.getEntry(0, y); weight2.setEntry(y, x, wold + correction); } } for (int x = 0; x < output_layer; ++x) { double correction = learning_rate * error_output.getEntry(0, x); // anggap 1 inputnya bias2.setEntry(0, x, bias2.getEntry(0, x) + correction); } // error layer 1 for (int k = 0; k < hidden_layer; ++k) { double o = hidden_matrix.getEntry(0, k); double t = 0; for (int x = 0; x < output_layer; ++x) { t += error_output.getEntry(0, x) * weight2.getEntry(k, x); } double err = o * (1 - o) * t; error_hidden.setEntry(0, k, err); } // back propagation layer 1 for (int y = 0; y < input_layer + 1; ++y) { for (int x = 0; x < hidden_layer; ++x) { double wold = weight1.getEntry(y, x); double correction = learning_rate * error_hidden.getEntry(0, x) * input_matrix.getEntry(0, y); weight1.setEntry(y, x, wold + correction); } } } } }
From source file:ffnn.MultilayerPerceptron.java
License:Open Source License
/** * This function sets what the m_numeric flag to represent the passed class it * also performs the normalization of the attributes if applicable and sets up * the info to normalize the class. (note that regardless of the options it * will fill an array with the range and base, set to normalize all attributes * and the class to be between -1 and 1) * //from w w w. java 2 s .c o m * @param inst the instances. * @return The modified instances. This needs to be done. If the attributes * are normalized then deep copies will be made of all the instances * which will need to be passed back out. */ private Instances setClassType(Instances inst) throws Exception { if (inst != null) { // x bounds m_attributeRanges = new double[inst.numAttributes()]; m_attributeBases = new double[inst.numAttributes()]; for (int noa = 0; noa < inst.numAttributes(); noa++) { double min = Double.POSITIVE_INFINITY; double max = Double.NEGATIVE_INFINITY; for (int i = 0; i < inst.numInstances(); i++) { if (!inst.instance(i).isMissing(noa)) { double value = inst.instance(i).value(noa); if (value < min) { min = value; } if (value > max) { max = value; } } } m_attributeRanges[noa] = (max - min) / 2; m_attributeBases[noa] = (max + min) / 2; } if (m_normalizeAttributes) { for (int i = 0; i < inst.numInstances(); i++) { Instance currentInstance = inst.instance(i); double[] instance = new double[inst.numAttributes()]; for (int noa = 0; noa < inst.numAttributes(); noa++) { if (noa != inst.classIndex()) { if (m_attributeRanges[noa] != 0) { instance[noa] = (currentInstance.value(noa) - m_attributeBases[noa]) / m_attributeRanges[noa]; } else { instance[noa] = currentInstance.value(noa) - m_attributeBases[noa]; } } else { instance[noa] = currentInstance.value(noa); } } inst.set(i, new DenseInstance(currentInstance.weight(), instance)); } } if (inst.classAttribute().isNumeric()) { m_numeric = true; } else { m_numeric = false; } } return inst; }