List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:c4.pkg5crossv.Classifier.java
public static void C45() throws FileNotFoundException, IOException, Exception { Instances data = DataLoad.loadData("./src/data/irysy.arff"); //Ustawienie atrybutu decyzyjnego (ostatni atrybut) data.setClassIndex(data.numAttributes() - 1); //OPCJE:/*from ww w .j a v a 2 s .c o m*/ //-U -> budowa drzewa bez przycinania (ostre liscie) //-C -> <wspolczynnik dokladnosci> - ustawienie wspolczynnika dokladnosci dla lisci (default 0.25) //-M -> ustawienie minimalnej liczby obiektow w lisciu dla ktorej lisc nie jest dzielony (default 2) //Ustalenie opcji String[] options = Utils.splitOptions("-U -M 10"); J48 tree = new J48(); tree.setOptions(options); //Ustawienie opcji tree.buildClassifier(data); // Tworzenie klasyfikatora (drzewa) System.out.println(tree.toString()); //Wypisanie drzewa w formie tekstowej System.out.println("TRAIN&TEST"); trainAndTest(); }
From source file:c4.pkg5crossv.Classifier.java
public static void trainAndTest() throws FileNotFoundException, IOException, Exception { Instances data = DataLoad.loadData("./src/data/irysy.arff"); data.setClassIndex(data.numAttributes() - 1); //Losowy podzial tablicy data.randomize(new Random()); double percent = 60.0; int trainSize = (int) Math.round(data.numInstances() * percent / 100); int testSize = data.numInstances() - trainSize; Instances trainData = new Instances(data, 0, trainSize); Instances testData = new Instances(data, trainSize, testSize); String[] options = Utils.splitOptions("-U -M 10"); J48 tree = new J48(); tree.setOptions(options);// w w w.ja v a 2 s . c om tree.buildClassifier(trainData); Evaluation eval2 = new Evaluation(trainData); eval2.crossValidateModel(tree, testData, 10, new Random(1)); // 5 - fold System.out.println(eval2.toSummaryString("Wyniki:", false)); //Wypisanie testovania cross validation }
From source file:c4.pkg5crossv.Preview.java
/** * method to write arff data into s.o.p. * @throws IOException //from w ww. j av a 2 s .co m */ public static void showData() throws IOException { String source = MainWindow.browsedFileLabel.getText(); Instances data = DataLoad.loadData(source.replace("\\", "/")); data.setClassIndex(data.numAttributes() - 1); String field = ""; for (int i = 0; i < data.numAttributes(); i++) { // Print the current attribute. System.out.print(data.attribute(i).name() + ": "); previewTextArea.append("\n" + data.attribute(i).name() + ": "); // Print the values associated with the current attribute. double[] values = data.attributeToDoubleArray(i); System.out.println(Arrays.toString(values)); previewTextArea.append(Arrays.toString(values)); } }
From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java
License:Apache License
public void trainClassifier(Classifier classifier, FileReader trainingDataset, FileOutputStream trainingModel, Integer crossValidationFoldNumber) throws Exception { Instances instances = new Instances(new BufferedReader(trainingDataset)); switch (classifier) { case KNN://from w w w.ja va 2 s. c o m int K = (int) Math.ceil(Math.sqrt(instances.numInstances())); this.classifier = new IBk(K); break; case NB: this.classifier = new NaiveBayes(); } if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); } this.classifier.buildClassifier(instances); if (crossValidationFoldNumber > 0) { Evaluation evaluation = new Evaluation(instances); evaluation.crossValidateModel(this.classifier, instances, crossValidationFoldNumber, new Random(1)); kappa = evaluation.kappa(); fMeasure = evaluation.weightedFMeasure(); confusionMatrix = evaluation.toMatrixString("Confusion matrix: "); } ObjectOutputStream outputStream = new ObjectOutputStream(trainingModel); outputStream.writeObject(this.classifier); outputStream.flush(); outputStream.close(); }
From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java
License:Apache License
public Map<String, String> makePrediction(String username, FileInputStream trainingModel, FileReader testingDataset) throws Exception { Map<String, String> predictions = new HashMap<>(); ObjectInputStream inputStream = new ObjectInputStream(trainingModel); weka.classifiers.Classifier classifier = (weka.classifiers.Classifier) inputStream.readObject(); inputStream.close();/*from ww w . j av a 2 s . c om*/ Instances instances = new Instances(new BufferedReader(testingDataset)); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); } int last = instances.numInstances() - 1; if (instances.instance(last).stringValue(instances.classIndex()).equals(username)) { double label = classifier.classifyInstance(instances.instance(last)); instances.instance(last).setClassValue(label); predictions.put(username, instances.instance(last).stringValue(instances.classIndex())); } return predictions; }
From source file:categorization.SpectralWEKA.java
License:Open Source License
/** * Generates a clusterer by the mean of spectral clustering algorithm. * * @param data set of instances serving as training data * @exception Exception if the clusterer has not been generated successfully *///from w ww. j av a 2 s . c o m public void buildClusterer(Instances data) throws java.lang.Exception { m_Sequences = new Instances(data); int n = data.numInstances(); int k = data.numAttributes(); DoubleMatrix2D w; if (useSparseMatrix) w = DoubleFactory2D.sparse.make(n, n); else w = DoubleFactory2D.dense.make(n, n); double[][] v1 = new double[n][]; for (int i = 0; i < n; i++) v1[i] = data.instance(i).toDoubleArray(); v = DoubleFactory2D.dense.make(v1); double sigma_sq = sigma * sigma; //Sets up similarity matrix for (int i = 0; i < n; i++) for (int j = i; j < n; j++) { /*double dist = distnorm2(v.viewRow(i), v.viewRow(j)); if((r == -1) || (dist < r)) { double sim = Math.exp(- (dist * dist) / (2 * sigma_sq)); w.set(i, j, sim); w.set(j, i, sim); }*/ /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)}; System.out.println(key[0]); System.out.println(key[1]); System.out.println(simScoreMap.containsKey(key)); Double simValue = simScoreMap.get(key);*/ double sim = sim_matrix[i][j]; w.set(i, j, sim); w.set(j, i, sim); } //Partitions points int[][] p = partition(w, alpha_star); //Deploys results numOfClusters = p.length; cluster = new int[n]; for (int i = 0; i < p.length; i++) for (int j = 0; j < p[i].length; j++) cluster[p[i][j]] = i; //System.out.println("Final partition:"); // UtilsJS.printMatrix(p); // System.out.println("Cluster:\n"); // UtilsJS.printArray(cluster); this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1; // System.out.println("Num clusters:\t"+this.numOfClusters); }
From source file:cba.Apriori.java
License:Open Source License
/** * Removes columns that are all missing from the data * @param instances the instances/*from ww w. j av a 2s . c o m*/ * @return a new set of instances with all missing columns removed * @throws Exception if something goes wrong */ protected Instances removeMissingColumns(Instances instances) throws Exception { int numInstances = instances.numInstances(); StringBuffer deleteString = new StringBuffer(); int removeCount = 0; boolean first = true; int maxCount = 0; for (int i = 0; i < instances.numAttributes(); i++) { AttributeStats as = instances.attributeStats(i); if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) { // see if we can decrease this by looking for the most frequent value int[] counts = as.nominalCounts; if (counts[Utils.maxIndex(counts)] > maxCount) { maxCount = counts[Utils.maxIndex(counts)]; } } if (as.missingCount == numInstances) { if (first) { deleteString.append((i + 1)); first = false; } else { deleteString.append("," + (i + 1)); } removeCount++; } } if (m_verbose) { System.err.println("Removed : " + removeCount + " columns with all missing " + "values."); } if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) { m_upperBoundMinSupport = (double) maxCount / (double) numInstances; if (m_verbose) { System.err.println("Setting upper bound min support to : " + m_upperBoundMinSupport); } } if (deleteString.toString().length() > 0) { Remove af = new Remove(); af.setAttributeIndices(deleteString.toString()); af.setInvertSelection(false); af.setInputFormat(instances); Instances newInst = Filter.useFilter(instances, af); return newInst; } return instances; }
From source file:cba.Apriori.java
License:Open Source License
/** * Method that generates all large itemsets with a minimum support, and from * these all association rules with a minimum confidence. * * @param instances the instances to be used for generating the associations * @throws Exception if rules can't be built successfully *//*from w w w . ja v a 2 s .c o m*/ public void buildAssociations(Instances instances) throws Exception { double[] confidences, supports; int[] indices; FastVector[] sortedRuleSet; int necSupport = 0; instances = new Instances(instances); if (m_removeMissingCols) { instances = removeMissingColumns(instances); } if (m_car && m_metricType != CONFIDENCE) throw new Exception("For CAR-Mining metric type has to be confidence!"); // only set class index if CAR is requested if (m_car) { if (m_classIndex == -1) { instances.setClassIndex(instances.numAttributes() - 1); } else if (m_classIndex <= instances.numAttributes() && m_classIndex > 0) { instances.setClassIndex(m_classIndex - 1); } else { throw new Exception("Invalid class index."); } } // can associator handle the data? getCapabilities().testWithFail(instances); m_cycles = 0; if (m_car) { //m_instances does not contain the class attribute m_instances = LabeledItemSet.divide(instances, false); //m_onlyClass contains only the class attribute m_onlyClass = LabeledItemSet.divide(instances, true); } else m_instances = instances; if (m_car && m_numRules == Integer.MAX_VALUE) { // Set desired minimum support m_minSupport = m_lowerBoundMinSupport; } else { // Decrease minimum support until desired number of rules found. m_minSupport = m_upperBoundMinSupport - m_delta; m_minSupport = (m_minSupport < m_lowerBoundMinSupport) ? m_lowerBoundMinSupport : m_minSupport; } do { // Reserve space for variables m_Ls = new FastVector(); m_hashtables = new FastVector(); m_allTheRules = new FastVector[6]; m_allTheRules[0] = new FastVector(); m_allTheRules[1] = new FastVector(); m_allTheRules[2] = new FastVector(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3] = new FastVector(); m_allTheRules[4] = new FastVector(); m_allTheRules[5] = new FastVector(); } sortedRuleSet = new FastVector[6]; sortedRuleSet[0] = new FastVector(); sortedRuleSet[1] = new FastVector(); sortedRuleSet[2] = new FastVector(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { sortedRuleSet[3] = new FastVector(); sortedRuleSet[4] = new FastVector(); sortedRuleSet[5] = new FastVector(); } if (!m_car) { // Find large itemsets and rules findLargeItemSets(); if (m_significanceLevel != -1 || m_metricType != CONFIDENCE) findRulesBruteForce(); else findRulesQuickly(); } else { findLargeCarItemSets(); findCarRulesQuickly(); } // Sort rules according to their support /* supports = new double[m_allTheRules[2].size()]; for (int i = 0; i < m_allTheRules[2].size(); i++) supports[i] = (double)((AprioriItemSet)m_allTheRules[1].elementAt(i)).support(); indices = Utils.stableSort(supports); for (int i = 0; i < m_allTheRules[2].size(); i++) { sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i])); sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i])); sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i])); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i])); sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i])); sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i])); } }*/ int j = m_allTheRules[2].size() - 1; supports = new double[m_allTheRules[2].size()]; for (int i = 0; i < (j + 1); i++) supports[j - i] = ((double) ((ItemSet) m_allTheRules[1].elementAt(j - i)).support()) * (-1); indices = Utils.stableSort(supports); for (int i = 0; i < (j + 1); i++) { sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[j - i])); sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[j - i])); sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[j - i])); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[j - i])); sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[j - i])); sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[j - i])); } } // Sort rules according to their confidence m_allTheRules[0].removeAllElements(); m_allTheRules[1].removeAllElements(); m_allTheRules[2].removeAllElements(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3].removeAllElements(); m_allTheRules[4].removeAllElements(); m_allTheRules[5].removeAllElements(); } confidences = new double[sortedRuleSet[2].size()]; int sortType = 2 + m_metricType; for (int i = 0; i < sortedRuleSet[2].size(); i++) confidences[i] = ((Double) sortedRuleSet[sortType].elementAt(i)).doubleValue(); indices = Utils.stableSort(confidences); for (int i = sortedRuleSet[0].size() - 1; (i >= (sortedRuleSet[0].size() - m_numRules)) && (i >= 0); i--) { m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i])); m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i])); m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i])); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i])); m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i])); m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i])); } } if (m_verbose) { if (m_Ls.size() > 1) { System.out.println(toString()); } } if (m_minSupport == m_lowerBoundMinSupport || m_minSupport - m_delta > m_lowerBoundMinSupport) m_minSupport -= m_delta; else m_minSupport = m_lowerBoundMinSupport; necSupport = Math.round((float) ((m_minSupport * (double) m_instances.numInstances()) + 0.5)); m_cycles++; } while ((m_allTheRules[0].size() < m_numRules) && (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) /* (necSupport >= lowerBoundNumInstancesSupport)*/ /* (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) */ && (necSupport >= 1)); m_minSupport += m_delta; }
From source file:cba.AprioriItemSet.java
License:Open Source License
/** * Converts the header info of the given set of instances into a set * of item sets (singletons). The ordering of values in the header file * determines the lexicographic order.// w ww. j a va2 s . co m * * @param instances the set of instances whose header info is to be used * @return a set of item sets, each containing a single item * @exception Exception if singletons can't be generated successfully */ public static FastVector singletons(Instances instances, boolean treatZeroAsMissing) throws Exception { FastVector setOfItemSets = new FastVector(); ItemSet current; for (int i = 0; i < instances.numAttributes(); i++) { if (instances.attribute(i).isNumeric()) throw new Exception("Can't handle numeric attributes!"); int j = (treatZeroAsMissing) ? 1 : 0; for (; j < instances.attribute(i).numValues(); j++) { current = new AprioriItemSet(instances.numInstances()); current.setTreatZeroAsMissing(treatZeroAsMissing); current.m_items = new int[instances.numAttributes()]; for (int k = 0; k < instances.numAttributes(); k++) current.m_items[k] = -1; current.m_items[i] = j; setOfItemSets.addElement(current); } } return setOfItemSets; }
From source file:cba.ItemSet.java
License:Open Source License
/** * Converts the header info of the given set of instances into a set * of item sets (singletons). The ordering of values in the header file * determines the lexicographic order./*from www . j a v a 2 s .co m*/ * * @param instances the set of instances whose header info is to be used * @return a set of item sets, each containing a single item * @exception Exception if singletons can't be generated successfully */ public static FastVector singletons(Instances instances) throws Exception { FastVector setOfItemSets = new FastVector(); ItemSet current; for (int i = 0; i < instances.numAttributes(); i++) { if (instances.attribute(i).isNumeric()) throw new Exception("Can't handle numeric attributes!"); for (int j = 0; j < instances.attribute(i).numValues(); j++) { current = new ItemSet(instances.numInstances()); current.m_items = new int[instances.numAttributes()]; for (int k = 0; k < instances.numAttributes(); k++) current.m_items[k] = -1; current.m_items[i] = j; setOfItemSets.addElement(current); } } return setOfItemSets; }