List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:c4.pkg5crossv.Classifier.java
public static void trainAndTest() throws FileNotFoundException, IOException, Exception { Instances data = DataLoad.loadData("./src/data/irysy.arff"); data.setClassIndex(data.numAttributes() - 1); //Losowy podzial tablicy data.randomize(new Random()); double percent = 60.0; int trainSize = (int) Math.round(data.numInstances() * percent / 100); int testSize = data.numInstances() - trainSize; Instances trainData = new Instances(data, 0, trainSize); Instances testData = new Instances(data, trainSize, testSize); String[] options = Utils.splitOptions("-U -M 10"); J48 tree = new J48(); tree.setOptions(options);//from w ww . j a va2s .co m tree.buildClassifier(trainData); Evaluation eval2 = new Evaluation(trainData); eval2.crossValidateModel(tree, testData, 10, new Random(1)); // 5 - fold System.out.println(eval2.toSummaryString("Wyniki:", false)); //Wypisanie testovania cross validation }
From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java
License:Apache License
public void trainClassifier(Classifier classifier, FileReader trainingDataset, FileOutputStream trainingModel, Integer crossValidationFoldNumber) throws Exception { Instances instances = new Instances(new BufferedReader(trainingDataset)); switch (classifier) { case KNN://from ww w . j av a2 s . com int K = (int) Math.ceil(Math.sqrt(instances.numInstances())); this.classifier = new IBk(K); break; case NB: this.classifier = new NaiveBayes(); } if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); } this.classifier.buildClassifier(instances); if (crossValidationFoldNumber > 0) { Evaluation evaluation = new Evaluation(instances); evaluation.crossValidateModel(this.classifier, instances, crossValidationFoldNumber, new Random(1)); kappa = evaluation.kappa(); fMeasure = evaluation.weightedFMeasure(); confusionMatrix = evaluation.toMatrixString("Confusion matrix: "); } ObjectOutputStream outputStream = new ObjectOutputStream(trainingModel); outputStream.writeObject(this.classifier); outputStream.flush(); outputStream.close(); }
From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java
License:Apache License
public Map<String, String> makePrediction(String username, FileInputStream trainingModel, FileReader testingDataset) throws Exception { Map<String, String> predictions = new HashMap<>(); ObjectInputStream inputStream = new ObjectInputStream(trainingModel); weka.classifiers.Classifier classifier = (weka.classifiers.Classifier) inputStream.readObject(); inputStream.close();/*from w w w . j av a 2 s . co m*/ Instances instances = new Instances(new BufferedReader(testingDataset)); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); } int last = instances.numInstances() - 1; if (instances.instance(last).stringValue(instances.classIndex()).equals(username)) { double label = classifier.classifyInstance(instances.instance(last)); instances.instance(last).setClassValue(label); predictions.put(username, instances.instance(last).stringValue(instances.classIndex())); } return predictions; }
From source file:categorization.SpectralWEKA.java
License:Open Source License
/** * Generates a clusterer by the mean of spectral clustering algorithm. * * @param data set of instances serving as training data * @exception Exception if the clusterer has not been generated successfully *//* w ww . j av a2s .c o m*/ public void buildClusterer(Instances data) throws java.lang.Exception { m_Sequences = new Instances(data); int n = data.numInstances(); int k = data.numAttributes(); DoubleMatrix2D w; if (useSparseMatrix) w = DoubleFactory2D.sparse.make(n, n); else w = DoubleFactory2D.dense.make(n, n); double[][] v1 = new double[n][]; for (int i = 0; i < n; i++) v1[i] = data.instance(i).toDoubleArray(); v = DoubleFactory2D.dense.make(v1); double sigma_sq = sigma * sigma; //Sets up similarity matrix for (int i = 0; i < n; i++) for (int j = i; j < n; j++) { /*double dist = distnorm2(v.viewRow(i), v.viewRow(j)); if((r == -1) || (dist < r)) { double sim = Math.exp(- (dist * dist) / (2 * sigma_sq)); w.set(i, j, sim); w.set(j, i, sim); }*/ /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)}; System.out.println(key[0]); System.out.println(key[1]); System.out.println(simScoreMap.containsKey(key)); Double simValue = simScoreMap.get(key);*/ double sim = sim_matrix[i][j]; w.set(i, j, sim); w.set(j, i, sim); } //Partitions points int[][] p = partition(w, alpha_star); //Deploys results numOfClusters = p.length; cluster = new int[n]; for (int i = 0; i < p.length; i++) for (int j = 0; j < p[i].length; j++) cluster[p[i][j]] = i; //System.out.println("Final partition:"); // UtilsJS.printMatrix(p); // System.out.println("Cluster:\n"); // UtilsJS.printArray(cluster); this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1; // System.out.println("Num clusters:\t"+this.numOfClusters); }
From source file:cba.Apriori.java
License:Open Source License
/** * Removes columns that are all missing from the data * @param instances the instances/*from w w w . ja v a 2s. c om*/ * @return a new set of instances with all missing columns removed * @throws Exception if something goes wrong */ protected Instances removeMissingColumns(Instances instances) throws Exception { int numInstances = instances.numInstances(); StringBuffer deleteString = new StringBuffer(); int removeCount = 0; boolean first = true; int maxCount = 0; for (int i = 0; i < instances.numAttributes(); i++) { AttributeStats as = instances.attributeStats(i); if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) { // see if we can decrease this by looking for the most frequent value int[] counts = as.nominalCounts; if (counts[Utils.maxIndex(counts)] > maxCount) { maxCount = counts[Utils.maxIndex(counts)]; } } if (as.missingCount == numInstances) { if (first) { deleteString.append((i + 1)); first = false; } else { deleteString.append("," + (i + 1)); } removeCount++; } } if (m_verbose) { System.err.println("Removed : " + removeCount + " columns with all missing " + "values."); } if (m_upperBoundMinSupport == 1.0 && maxCount != numInstances) { m_upperBoundMinSupport = (double) maxCount / (double) numInstances; if (m_verbose) { System.err.println("Setting upper bound min support to : " + m_upperBoundMinSupport); } } if (deleteString.toString().length() > 0) { Remove af = new Remove(); af.setAttributeIndices(deleteString.toString()); af.setInvertSelection(false); af.setInputFormat(instances); Instances newInst = Filter.useFilter(instances, af); return newInst; } return instances; }
From source file:cba.AprioriItemSet.java
License:Open Source License
/** * Converts the header info of the given set of instances into a set * of item sets (singletons). The ordering of values in the header file * determines the lexicographic order.//from w w w .j ava2 s .co m * * @param instances the set of instances whose header info is to be used * @return a set of item sets, each containing a single item * @exception Exception if singletons can't be generated successfully */ public static FastVector singletons(Instances instances, boolean treatZeroAsMissing) throws Exception { FastVector setOfItemSets = new FastVector(); ItemSet current; for (int i = 0; i < instances.numAttributes(); i++) { if (instances.attribute(i).isNumeric()) throw new Exception("Can't handle numeric attributes!"); int j = (treatZeroAsMissing) ? 1 : 0; for (; j < instances.attribute(i).numValues(); j++) { current = new AprioriItemSet(instances.numInstances()); current.setTreatZeroAsMissing(treatZeroAsMissing); current.m_items = new int[instances.numAttributes()]; for (int k = 0; k < instances.numAttributes(); k++) current.m_items[k] = -1; current.m_items[i] = j; setOfItemSets.addElement(current); } } return setOfItemSets; }
From source file:cba.ItemSet.java
License:Open Source License
/** * Converts the header info of the given set of instances into a set * of item sets (singletons). The ordering of values in the header file * determines the lexicographic order.// w w w . j a va 2 s . c om * * @param instances the set of instances whose header info is to be used * @return a set of item sets, each containing a single item * @exception Exception if singletons can't be generated successfully */ public static FastVector singletons(Instances instances) throws Exception { FastVector setOfItemSets = new FastVector(); ItemSet current; for (int i = 0; i < instances.numAttributes(); i++) { if (instances.attribute(i).isNumeric()) throw new Exception("Can't handle numeric attributes!"); for (int j = 0; j < instances.attribute(i).numValues(); j++) { current = new ItemSet(instances.numInstances()); current.m_items = new int[instances.numAttributes()]; for (int k = 0; k < instances.numAttributes(); k++) current.m_items[k] = -1; current.m_items[i] = j; setOfItemSets.addElement(current); } } return setOfItemSets; }
From source file:cba.ItemSet.java
License:Open Source License
/** * Updates counters for a set of item sets and a set of instances. * * @param itemSets the set of item sets which are to be updated * @param instances the instances to be used for updating the counters *///from w w w. ja va 2 s . c o m public static void upDateCounters(FastVector itemSets, Instances instances) { for (int i = 0; i < instances.numInstances(); i++) { Enumeration enu = itemSets.elements(); while (enu.hasMoreElements()) ((ItemSet) enu.nextElement()).upDateCounter(instances.instance(i)); } }
From source file:cba.RuleItem.java
License:Open Source License
/** * Constructs a new RuleItem if the support of the given rule is above the support threshold. * @param premise the premise/*from w ww. ja va 2 s. c o m*/ * @param consequence the consequence * @param instances the instances * @param genTime the time of generation of the current premise and consequence * @param minRuleCount the support threshold * @param m_midPoints the mid points of the intervals * @param m_priors the estimated priori probabilities (in a hashtable) * @return a RuleItem if its support is above the threshold, null otherwise */ public RuleItem generateRuleItem(ItemSet premise, ItemSet consequence, Instances instances, int genTime, int minRuleCount, double[] m_midPoints, Hashtable m_priors) { ItemSet rule = new ItemSet(instances.numInstances()); rule.m_items = new int[(consequence.m_items).length]; System.arraycopy(premise.m_items, 0, rule.m_items, 0, (premise.m_items).length); for (int k = 0; k < consequence.m_items.length; k++) { if (consequence.m_items[k] != -1) rule.m_items[k] = consequence.m_items[k]; } for (int i = 0; i < instances.numInstances(); i++) rule.upDateCounter(instances.instance(i)); int ruleSupport = rule.support(); if (ruleSupport > minRuleCount) { RuleItem newRule = new RuleItem(premise, consequence, genTime, ruleSupport, m_midPoints, m_priors); return newRule; } return null; }
From source file:cerebro.Id3.java
License:Open Source License
/** * Method for building an Id3 tree.//w ww . jav a2 s.c o m * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].makeTree(splitData[j]); } } }