List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:TreeNode.java
License:Common Public License
public double calculateEntropy(Instances instances) { if (instances.numClasses() <= 1) return 0; else {/* w ww . jav a 2s. c o m*/ int numInstances = instances.numInstances(); int numClasses = instances.numClasses(); //Count how many in each class int[] classCounts = new int[numClasses]; for (int i = 0; i < numInstances; i++) { classCounts[(int) instances.instance(i).classValue()]++; } //Calculate the entropy double entropy = 0; double quotient; for (int i = 0; i < numClasses; i++) { double result; if (classCounts[i] == 0) { result = 0; } else { quotient = (double) classCounts[i] / (double) numInstances; result = (quotient * Math.log(quotient) / Math.log(numClasses)); assert (Double.isNaN(result) && result <= 1); } entropy = entropy - result; } return entropy; } }
From source file:Pair.java
License:Open Source License
/** * Boosting method./* ww w.ja v a 2 s . com*/ * * @param data the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { super.buildClassifier(data); if (data.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle string attributes!"); } data = new Instances(data); data.deleteWithMissingClass(); if (data.numInstances() == 0) { throw new Exception("No train instances without class missing!"); } if (!data.classAttribute().isNumeric()) { throw new UnsupportedClassTypeException("TrAdaBoostR2 can only handle a numeric class!"); } if (m_SourceInstances == null) { throw new Exception("Source data has not been specified!"); } m_NumClasses = data.numClasses(); try { doCV(data); } catch (Exception e) { e.printStackTrace(); } }
From source file:Pair.java
License:Open Source License
private void doCV(Instances targetData) throws Exception { System.out.println();/* w w w. ja v a 2s . c o m*/ System.out.flush(); int numSourceInstances = m_SourceInstances.numInstances(); int numInstances = targetData.numInstances() + numSourceInstances; numTargetInstances = numInstances - numSourceInstances; double weightSource, weightTarget; double initialSourceFraction; double[] weights = new double[numInstances]; Random randomInstance = new Random(1); Instances data = new Instances(m_SourceInstances, 0, numSourceInstances); // Now add the target data, shallow copying the instances as they are added // so it doesn't mess up the weights for anyone else Enumeration enumer = targetData.enumerateInstances(); while (enumer.hasMoreElements()) { Instance instance = (Instance) enumer.nextElement(); data.add(instance); } if (sourceRatio < 0) { //weight all equally weightSource = weightTarget = 1.0/*/numInstances*/; initialSourceFraction = numSourceInstances / (double) numInstances; } else { double totalWeight = 1 + sourceRatio; weightSource = sourceRatio / totalWeight/*/numSourceInstances*/; weightTarget = 1.0 / totalWeight/*/numTargetInstances*/; initialSourceFraction = weightSource; } for (int j = 0; j < numInstances; j++) { Instance instance = data.instance(j); if (j < numSourceInstances) instance.setWeight(weightSource); else instance.setWeight(weightTarget); } if (doFraction) { for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) { sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular] if (sourceFraction > .995) sourceFraction = .995; //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances; double sourceWeight = (sourceFraction * numTargetInstances) / (numSourceInstances * (1 - sourceFraction)); for (int j = 0; j < numInstances; j++) { Instance instance = data.instance(j); if (j < numSourceInstances) instance.setWeight(sourceWeight); else instance.setWeight(1); } buildClassifierWithWeights(data); System.out.println("Iteration " + it + ":" + getTestError()); } } else { for (int i = 0; i < numInstances; i++) weights[i] = data.instance(i).weight(); buildClassifierWithWeights(data); System.out.println("Iteration -1:" + getTestError()); for (int i = 0; i < numInstances; i++) data.instance(i).setWeight(weights[i]); for (int it = 0; it < sourceIterations; it++) { Instances sample = null; if (!resample || m_NumIterationsPerformed == 0) { sample = data; } else { double sum = data.sumOfWeights(); double[] sweights = new double[data.numInstances()]; for (int i = 0; i < sweights.length; i++) { sweights[i] = data.instance(i).weight() / sum; } sample = data.resampleWithWeights(randomInstance, sweights); } try { m_Classifiers[it].buildClassifier(sample); } catch (Exception e) { e.printStackTrace(); System.out.println("E: " + e); } sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations); setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false); for (int i = 0; i < numInstances; i++) weights[i] = data.instance(i).weight(); buildClassifierWithWeights(data); System.out.println("Iteration " + it + ":" + getTestError()); for (int i = 0; i < numInstances; i++) data.instance(i).setWeight(weights[i]); } } }
From source file:Pair.java
License:Open Source License
/** * Boosting method. Boosts any classifier that can handle weighted * instances./*w w w.j a v a 2 s.c om*/ * * @param data the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ protected void buildClassifierWithWeights(Instances data) throws Exception { Random randomInstance = new Random(0); double epsilon, reweight, beta = 0; Evaluation evaluation; Instances sample; // Initialize data m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; int numSourceInstances = m_SourceInstances.numInstances(); // Do boostrap iterations for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) { // Build the classifier sample = null; if (!resample || m_NumIterationsPerformed == 0) { sample = data; } else { double sum = data.sumOfWeights(); double[] weights = new double[data.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = data.instance(i).weight() / sum; } sample = data.resampleWithWeights(randomInstance, weights); if (doSampleSize) { int effectiveInstances = (int) (sourceFraction * weights.length + numTargetInstances); if (effectiveInstances > numSourceInstances + numTargetInstances) effectiveInstances = numSourceInstances + numTargetInstances; //System.out.println(effectiveInstances); sample.randomize(randomInstance); Instances q = new Instances(sample, 0, effectiveInstances); sample = q; } } try { m_Classifiers[m_NumIterationsPerformed].buildClassifier(sample); } catch (Exception e) { e.printStackTrace(); System.out.println("E: " + e); } if (doBagging) beta = 0.4 / .6; //always same beta else beta = setWeights(data, m_Classifiers[m_NumIterationsPerformed], -1, numSourceInstances, true); // Stop if error too small or error too big and ignore this model if (beta < 0) { //setWeights indicates a problem with negative beta if (m_NumIterationsPerformed == 0) { m_NumIterationsPerformed = 1; // If we're the first we have to to use it } break; } // Determine the weight to assign to this model m_Betas[m_NumIterationsPerformed] = Math.log(1 / beta); } betaSum = 0; for (int i = 0; i < m_NumIterationsPerformed; i++) betaSum += m_Betas[i]; }
From source file:Pair.java
License:Open Source License
/** * Sets the weights for the next iteration. *//*from www .ja v a2 s .co m*/ protected double setWeights(Instances trainData, Classifier cls, double sourceFraction, int numSourceInstances, boolean isFinal) throws Exception { Enumeration enu = trainData.enumerateInstances(); int instNum = 0; double[] errors = new double[trainData.numInstances()]; double max = 0; int i = 0; while (enu.hasMoreElements()) { Instance instance = (Instance) enu.nextElement(); errors[i] = Math.abs(cls.classifyInstance(instance) - instance.classValue()); if (i >= numSourceInstances && errors[i] > max) max = errors[i]; i++; } if (max == 0) return -1; //get avg loss double loss = 0; double initialTWeightSum = 0; double allWeightSum = 0; for (int j = 0; j < errors.length; j++) { errors[j] /= max; Instance instance = trainData.instance(j); loss += instance.weight() * errors[j]; if (j >= numSourceInstances) { //loss += instance.weight() * errors[j]; initialTWeightSum += instance.weight(); } allWeightSum += instance.weight(); } //loss /= weightSum; loss /= allWeightSum; targetWeight = initialTWeightSum / allWeightSum; /* if (!isFinal){ System.out.println("Target weight: " + targetWeight); System.out.println("max: " + max); System.out.println("avg error: " + loss * max); System.out.println("Loss: " + loss); } */ double beta; if (fixedBeta) beta = 0.4 / 0.6; else { if (isFinal && loss > 0.499)//bad, so quit //return -1; loss = 0.499; //since we're doing CV, no reason to quit beta = loss / (1 - loss); //or just use beta = .4/.6, since beta isn't as meaningful in AdaBoost.R2; } double tWeightSum = 0; if (!isFinal) { //need to find b so that weight of source be sourceFraction*num source //do binary search double goal = sourceFraction * errors.length; double bMin = .001; double bMax = .999; double b; double sourceSum = 0; while (bMax - bMin > .001) { b = (bMax + bMin) / 2; double sum = 0; for (int j = 0; j < numSourceInstances; j++) { Instance instance = trainData.instance(j); sum += Math.pow(b, errors[j]) * instance.weight(); } if (sum > goal) bMax = b; else bMin = b; } b = (bMax + bMin) / 2; //System.out.println(b); for (int j = 0; j < numSourceInstances; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(bMin, errors[j])); sourceSum += instance.weight(); } //now adjust target weights goal = errors.length - sourceSum; double m = goal / initialTWeightSum; for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * m); } } else {//final if (!doUpsource) { //modify only target weights for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(beta, -errors[j])); tWeightSum += instance.weight(); } double weightSumInverse = initialTWeightSum / tWeightSum; for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * weightSumInverse); } } else { //modify all weights for (int j = 0; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(beta, -errors[j])); tWeightSum += instance.weight(); } double weightSumInverse = errors.length / tWeightSum; for (int j = 0; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * weightSumInverse); } } } return beta; }
From source file:classificationPLugin.java
private void ClassifyActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_ClassifyActionPerformed this.name = txtdirecotry2.getText(); System.out.println(this.name); try {/* w w w. j ava 2 s . c o m*/ CSVLoader loader = new CSVLoader(); loader.setSource(new File(this.name)); Instances data = loader.getDataSet(); System.out.println(data); // save ARFF String arffile = this.name + ".arff"; System.out.println(arffile); ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(new File(arffile)); saver.writeBatch(); } catch (IOException ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } try { FileReader reader = new FileReader(this.name + ".arff"); BufferedReader br = new BufferedReader(reader); instance.read(br, null); br.close(); instance.requestFocus(); } catch (Exception e2) { System.out.println(e2); } Instances data; try { data = new Instances(new BufferedReader(new FileReader(this.name + ".arff"))); Instances newData = null; Add filter; newData = new Instances(data); filter = new Add(); filter.setAttributeIndex("last"); filter.setNominalLabels("rods,punctua,networks"); filter.setAttributeName("target"); filter.setInputFormat(newData); newData = Filter.useFilter(newData, filter); System.out.print(newData); Vector vec = new Vector(); newData.setClassIndex(newData.numAttributes() - 1); if (!newData.equalHeaders(newData)) { throw new IllegalArgumentException("Train and test are not compatible!"); } URL urlToModel = this.getClass().getResource("/" + "Final.model"); InputStream stream = urlToModel.openStream(); Classifier cls = (Classifier) weka.core.SerializationHelper.read(stream); System.out.println("PROVANT MODEL.classifyInstance"); for (int i = 0; i < newData.numInstances(); i++) { double pred = cls.classifyInstance(newData.instance(i)); double[] dist = cls.distributionForInstance(newData.instance(i)); System.out.print((i + 1) + " - "); System.out.print(newData.classAttribute().value((int) pred) + " - "); //txtarea2.setText(Utils.arrayToString(dist)); System.out.println(Utils.arrayToString(dist)); vec.add(newData.classAttribute().value((int) pred)); } int p = 0, n = 0, r = 0; //txtarea2.append(Utils.arrayToString(this.target)); for (Object vec1 : vec) { if ("rods".equals(vec1.toString())) { r = r + 1; } if ("punctua".equals(vec1.toString())) { p = p + 1; } if ("networks".equals(vec1.toString())) { n = n + 1; } PrintWriter out = null; try { out = new PrintWriter(this.name + "_morphology.txt"); out.println(vec); out.close(); } catch (Exception ex) { ex.printStackTrace(); } //System.out.println(vec.get(i)); } System.out.println("VECTOR-> punctua: " + p + ", rods: " + r + ", networks: " + n); IJ.showMessage( "Your file:" + this.name + "arff" + "\nhas been analysed, and it is composed by-> \npunctua: " + p + ", rods: " + r + ", networks: " + n); classi.setText( "Your file:" + this.name + "arff" + "\nhas been analysed, and it is composed by: \npunctua: " + p + ", rods: " + r + ", networks: " + n); } catch (IOException ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } IJ.run("Clear Results"); IJ.run("Clear Results"); IJ.run("Close All", ""); if (WindowManager.getFrame("Results") != null) { IJ.selectWindow("Results"); IJ.run("Close"); } if (WindowManager.getFrame("Summary") != null) { IJ.selectWindow("Summary"); IJ.run("Close"); } if (WindowManager.getFrame("Results") != null) { IJ.selectWindow("Results"); IJ.run("Close"); } if (WindowManager.getFrame("ROI Manager") != null) { IJ.selectWindow("ROI Manager"); IJ.run("Close"); } IJ.run("Close All", "roiManager"); IJ.run("Close All", ""); }
From source file:REPRandomTree.java
License:Open Source License
/** * Builds classifier./*from w ww .j a v a 2 s . c o m*/ * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth, m_FeatureFrac, random); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:LabeledItemSet.java
License:Open Source License
/** * Converts the header info of the given set of instances into a set * of item sets (singletons). The ordering of values in the header file * determines the lexicographic order. Each item set knows its class label. * @return a set of item sets, each containing a single item * @param instancesNoClass instances without the class attribute * @param classes the values of the class attribute sorted according to instances * @exception Exception if singletons can't be generated successfully */// w ww . j av a 2 s . c o m public static FastVector singletons(Instances instancesNoClass, Instances classes) throws Exception { FastVector cSet, setOfItemSets = new FastVector(); LabeledItemSet current; //make singletons for (int i = 0; i < instancesNoClass.numAttributes(); i++) { if (instancesNoClass.attribute(i).isNumeric()) throw new Exception("Can't handle numeric attributes!"); for (int j = 0; j < instancesNoClass.attribute(i).numValues(); j++) { for (int k = 0; k < (classes.attribute(0)).numValues(); k++) { current = new LabeledItemSet(instancesNoClass.numInstances(), k); current.m_items = new int[instancesNoClass.numAttributes()]; for (int l = 0; l < instancesNoClass.numAttributes(); l++) current.m_items[l] = -1; current.m_items[i] = j; setOfItemSets.addElement(current); } } } return setOfItemSets; }
From source file:LabeledItemSet.java
License:Open Source License
/** * Updates counter of a specific item set * @param itemSets an item sets/*ww w .ja v a 2 s .c o m*/ * @param instancesNoClass instances without the class attribute * @param instancesClass the values of the class attribute sorted according to instances */ public static void upDateCounters(FastVector itemSets, Instances instancesNoClass, Instances instancesClass) { for (int i = 0; i < instancesNoClass.numInstances(); i++) { Enumeration enu = itemSets.elements(); while (enu.hasMoreElements()) ((LabeledItemSet) enu.nextElement()).upDateCounter(instancesNoClass.instance(i), instancesClass.instance(i)); } }
From source file:dialog1.java
private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton1ActionPerformed try {//from ww w . ja va 2 s . co m CSVLoader loader = new CSVLoader(); loader.setSource(new File(txtfilename.getText() + "_complete.csv")); Instances data = loader.getDataSet(); System.out.println(data); // save ARFF String arffile = this.name3 + ".arff"; System.out.println(arffile); ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(new File(arffile)); saver.writeBatch(); } catch (IOException ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } Instances data; try { data = new Instances(new BufferedReader(new FileReader(this.name3 + ".arff"))); Instances newData = null; Add filter; newData = new Instances(data); filter = new Add(); filter.setAttributeIndex("last"); filter.setNominalLabels("rods,punctua,networks"); filter.setAttributeName("target"); filter.setInputFormat(newData); newData = Filter.useFilter(newData, filter); System.out.print(newData); Vector vec = new Vector(); newData.setClassIndex(newData.numAttributes() - 1); if (!newData.equalHeaders(newData)) { throw new IllegalArgumentException("Train and test are not compatible!"); } /*URL urlToModel = this.getClass().getResource("/" + "Final.model"); InputStream stream = urlToModel.openStream();*/ InputStream stream = this.getClass().getResourceAsStream("/" + "Final.model"); Classifier cls = (Classifier) weka.core.SerializationHelper.read(stream); System.out.println("PROVANT MODEL.classifyInstance"); for (int i = 0; i < newData.numInstances(); i++) { double pred = cls.classifyInstance(newData.instance(i)); double[] dist = cls.distributionForInstance(newData.instance(i)); System.out.print((i + 1) + " - "); System.out.print(newData.classAttribute().value((int) pred) + " - "); //txtarea2.setText(Utils.arrayToString(dist)); System.out.println(Utils.arrayToString(dist)); vec.add(newData.classAttribute().value((int) pred)); //txtarea2.append(Utils.arrayToString(newData.classAttribute().value((int) pred))); //this.target2.add((i + 1) + " -); //this.target.add(newData.classAttribute().value((int) pred)); //for (String s : this.list) { //this.target2 += s + ","; } int p = 0, n = 0, r = 0; //txtarea2.append(Utils.arrayToString(this.target)); for (Object vec1 : vec) { if ("rods".equals(vec1.toString())) { r = r + 1; } if ("punctua".equals(vec1.toString())) { p = p + 1; } if ("networks".equals(vec1.toString())) { n = n + 1; } PrintWriter out = null; try { out = new PrintWriter(this.name3 + "_morphology.txt"); out.println(vec); out.close(); } catch (Exception ex) { ex.printStackTrace(); } //System.out.println(vec.get(i)); } System.out.println("VECTOR-> punctua: " + p + ", rods: " + r + ", networks: " + n); IJ.showMessage( "Your file:" + this.name3 + "arff" + "\nhas been analysed, and it is composed by-> punctua: " + p + ", rods: " + r + ", networks: " + n); //txtarea2.setText("Your file:" + this.name3 + ".arff" //+ "\nhas been analysed, and it is composed by-> punctua: " + p + ", rods: " + r + ", networks: " + n //+ "\n" //+ "\nAnalyse complete"); //txtarea.setText("Analyse complete"); } catch (IOException ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } IJ.run("Clear Results"); IJ.run("Clear Results"); IJ.run("Close All", ""); if (WindowManager.getFrame("Results") != null) { IJ.selectWindow("Results"); IJ.run("Close"); } if (WindowManager.getFrame("Summary") != null) { IJ.selectWindow("Summary"); IJ.run("Close"); } if (WindowManager.getFrame("Results") != null) { IJ.selectWindow("Results"); IJ.run("Close"); } if (WindowManager.getFrame("ROI Manager") != null) { IJ.selectWindow("ROI Manager"); IJ.run("Close"); } IJ.run("Close All", "roiManager"); IJ.run("Close All", ""); setVisible(false); dispose();// TODO add your handling code here: setVisible(false); dispose();// TODO add your handling code here: // TODO add your handling code here: }