List of usage examples for weka.core Instances Instances
public Instances(Instances dataset)
From source file:Bilbo.java
License:Open Source License
/** * Bagging method.//from w ww. j av a 2s . com * * @param data the training data to be used for generating the * bagged classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data, Instances p_unlabeledData) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // Has user asked to represent copies using weights? if (getRepresentCopiesUsingWeights() && !(m_Classifier instanceof WeightedInstancesHandler)) { throw new IllegalArgumentException("Cannot represent copies using weights when " + "base learner in bagging does not implement " + "WeightedInstancesHandler."); } // get fresh Instances object m_data = new Instances(data); m_unlabeledData = new Instances(p_unlabeledData); super.buildClassifier(m_data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } m_random = new Random(m_Seed); m_inBag = null; if (m_CalcOutOfBag) m_inBag = new boolean[m_Classifiers.length][]; for (int j = 0; j < m_Classifiers.length; j++) { if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(m_random.nextInt()); } } //Insert oracle loop here TODO buildClassifiers(); Instances inst = new Instances(m_data); for (int i = 0; i < m_Classifiers.length; i++) { inst.clear(); ((NewTree) m_Classifiers[i]).GetTransductedInstances(inst); ((NewTree) m_Classifiers[i]).DoInduction(inst); // Ehm, do something boyski } // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = m_data.classAttribute().isNumeric(); for (int i = 0; i < m_data.numInstances(); i++) { double vote; double[] votes; if (numeric) votes = new double[1]; else votes = new double[m_data.numClasses()]; // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (m_inBag[j][i]) continue; if (numeric) { double pred = ((NewTree) m_Classifiers[j]).classifyInstance(m_data.instance(i)); if (!Utils.isMissingValue(pred)) { votes[0] += pred; voteCount++; } } else { voteCount++; double[] newProbs = ((NewTree) m_Classifiers[j]) .distributionForInstance(m_data.instance(i)); // average the probability estimates for (int k = 0; k < newProbs.length; k++) { votes[k] += newProbs[k]; } } } // "vote" if (numeric) { if (voteCount == 0) { vote = Utils.missingValue(); } else { vote = votes[0] / voteCount; // average } } else { if (Utils.eq(Utils.sum(votes), 0)) { vote = Utils.missingValue(); } else { vote = Utils.maxIndex(votes); // predicted class Utils.normalize(votes); } } // error for instance if (!Utils.isMissingValue(vote) && !m_data.instance(i).classIsMissing()) { outOfBagCount += m_data.instance(i).weight(); if (numeric) { errorSum += (StrictMath.abs(vote - m_data.instance(i).classValue()) * m_data.instance(i).weight()) / m_data.instance(i).classValue(); } else { if (vote != m_data.instance(i).classValue()) errorSum += m_data.instance(i).weight(); } } } if (outOfBagCount > 0) { m_OutOfBagError = errorSum / outOfBagCount; } } else { m_OutOfBagError = 0; } // save memory m_data = null; }
From source file:classifyfromimage.java
private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton1ActionPerformed this.name3 = IJ.getImage().getTitle(); this.name4 = this.name3.replaceFirst("[.][^.]+$", ""); System.out.println("hola " + this.name4); selectWindow(this.name3); System.out.println(this.name4); System.out.println(this.name3); RoiManager rm = RoiManager.getInstance(); IJ.run("Duplicate...", this.name3); IJ.run("Set Measurements...", "area perimeter fit shape limit scientific redirect=None decimal=5"); selectWindow(this.name3); IJ.run("Subtract Background...", "rolling=1.5"); IJ.run("Enhance Contrast...", "saturated=25 equalize"); IJ.run("Subtract Background...", "rolling=1.5"); IJ.run("Convolve...", "text1=[-1 -3 -4 -3 -1\n-3 0 6 0 -3\n-4 6 50 6 -4\n-3 0 6 0 -3\n-1 -3 -4 -3 -1\n] normalize"); IJ.run("8-bit", ""); IJ.run("Restore Selection", ""); IJ.run("Make Binary", ""); Prefs.blackBackground = false;//from ww w . j ava2 s .c o m IJ.run("Convert to Mask", ""); IJ.run("Restore Selection", ""); this.valor1 = this.interval3.getText(); this.valor2 = this.interval4.getText(); System.out.println("VECTOR-> punctua: " + this.valor1 + " " + this.valor2); this.text = "size=" + this.valor1 + "-" + this.valor2 + " pixel show=Outlines display include summarize add"; IJ.run("Analyze Particles...", this.text); IJ.saveAs("tif", this.name3 + "_processed"); String dest_filename1, dest_filename2, full; selectWindow("Results"); //dest_filename1 = this.name2 + "_complete.txt"; dest_filename2 = this.name3 + "_complete.csv"; //IJ.saveAs("Results", prova + File.separator + dest_filename1); IJ.run("Input/Output...", "jpeg=85 gif=-1 file=.csv copy_row save_column save_row"); //IJ.saveAs("Results", dir + File.separator + dest_filename2); IJ.saveAs("Results", this.name3 + "_complete.csv"); IJ.run("Restore Selection"); IJ.run("Clear Results"); //txtarea.setText("Converting, please wait... "); try { CSVLoader loader = new CSVLoader(); loader.setSource(new File(this.name3 + "_complete.csv")); Instances data = loader.getDataSet(); System.out.println(data); // save ARFF String arffile = this.name3 + ".arff"; System.out.println(arffile); ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(new File(arffile)); saver.writeBatch(); } catch (IOException ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } //txtdata2.setText(this.name3); //txtarea.setText("Succesfully converted " + this.name3); //txtarea.setText("Analysing your data, please wait... "); Instances data; try { data = new Instances(new BufferedReader(new FileReader(this.name3 + ".arff"))); Instances newData = null; Add filter; newData = new Instances(data); filter = new Add(); filter.setAttributeIndex("last"); filter.setNominalLabels("rods,punctua,networks"); filter.setAttributeName("target"); filter.setInputFormat(newData); newData = Filter.useFilter(newData, filter); System.out.print(newData); Vector vec = new Vector(); newData.setClassIndex(newData.numAttributes() - 1); if (!newData.equalHeaders(newData)) { throw new IllegalArgumentException("Train and test are not compatible!"); } URL urlToModel = this.getClass().getResource("/" + "Final.model"); InputStream stream = urlToModel.openStream(); Classifier cls = (Classifier) weka.core.SerializationHelper.read(stream); System.out.println("PROVANT MODEL.classifyInstance"); for (int i = 0; i < newData.numInstances(); i++) { double pred = cls.classifyInstance(newData.instance(i)); double[] dist = cls.distributionForInstance(newData.instance(i)); System.out.print((i + 1) + " - "); System.out.print(newData.classAttribute().value((int) pred) + " - "); //txtarea2.setText(Utils.arrayToString(dist)); System.out.println(Utils.arrayToString(dist)); vec.add(newData.classAttribute().value((int) pred)); } int p = 0, n = 0, r = 0; //txtarea2.append(Utils.arrayToString(this.target)); for (Object vec1 : vec) { if ("rods".equals(vec1.toString())) { r = r + 1; } if ("punctua".equals(vec1.toString())) { p = p + 1; } if ("networks".equals(vec1.toString())) { n = n + 1; } PrintWriter out = null; try { out = new PrintWriter(this.name3 + "_morphology.txt"); out.println(vec); out.close(); } catch (Exception ex) { ex.printStackTrace(); } //System.out.println(vec.get(i)); } System.out.println("VECTOR-> punctua: " + p + ", rods: " + r + ", networks: " + n); IJ.showMessage( "Your file:" + this.name3 + "arff" + "\nhas been analysed, and it is composed by-> punctua: " + p + ", rods: " + r + ", networks: " + n); this.txtarea2.setText( "Your file:" + this.name3 + ".arff" + "\nhas been analysed, and it is composed by-> punctua: " + p + ", rods: " + r + ", networks: " + n); A_MachineLearning nf1 = new A_MachineLearning(); A_MachineLearning.txtresults1.setText(this.txtarea2.getText()); A_MachineLearning.txtresults1.setText(this.txtarea2.getText()); A_MachineLearning.txtresults1.setText(this.txtarea2.getText()); A_MachineLearning.txtresults1.append(this.txtarea2.getText()); A_MachineLearning.txtresults1.append(this.txtarea2.getText()); A_MachineLearning.txtresults1.append(this.txtarea2.getText()); nf1.setVisible(true); } catch (IOException ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } IJ.run("Clear Results"); //IJ.RoiManager("Delete"); IJ.run("Clear Results"); IJ.run("Close All", ""); if (WindowManager.getFrame("Results") != null) { IJ.selectWindow("Results"); IJ.run("Close"); } if (WindowManager.getFrame("Summary") != null) { IJ.selectWindow("Summary"); IJ.run("Close"); } if (WindowManager.getFrame("Results") != null) { IJ.selectWindow("Results"); IJ.run("Close"); } if (WindowManager.getFrame("ROI Manager") != null) { IJ.selectWindow("ROI Manager"); IJ.run("Close"); } IJ.run("Close All", "roiManager"); IJ.run("Close All", ""); setVisible(false); dispose();// TODO add your handling code here: setVisible(false); dispose();// TODO add your handling code here: // TODO add your handling code here: }
From source file:ExperimentDemo.java
License:Open Source License
/** * Expects the following parameters: //w w w. j a v a 2 s. c o m * <ul> * <li>-classifier "classifier incl. parameters"</li> * <li>-exptype "classification|regression"</li> * <li>-splittype "crossvalidation|randomsplit"</li> * <li>-runs "# of runs"</li> * <li>-folds "# of cross-validation folds"</li> * <li>-percentage "percentage for randomsplit"</li> * <li>-result "arff file for storing the results"</li> * <li>-t "dataset" (can be supplied multiple times)</li> * </ul> * * @param args the commandline arguments * @throws Exception if something goes wrong */ public static void main(String[] args) throws Exception { // parameters provided? if (args.length == 0) { System.out.println("\nUsage: weka.examples.experiment.ExperimentDemo\n" + "\t -classifier <classifier incl. parameters>\n" + "\t -exptype <classification|regression>\n" + "\t -splittype <crossvalidation|randomsplit>\n" + "\t -runs <# of runs>\n" + "\t -folds <folds for CV>\n" + "\t -percentage <percentage for randomsplit>\n" + "\t -result <ARFF file for storing the results>\n" + "\t -t dataset (can be supplied multiple times)\n"); System.exit(1); } // 1. setup the experiment System.out.println("Setting up..."); Experiment exp = new Experiment(); exp.setPropertyArray(new Classifier[0]); exp.setUsePropertyIterator(true); String option; // classification or regression option = Utils.getOption("exptype", args); if (option.length() == 0) throw new IllegalArgumentException("No experiment type provided!"); SplitEvaluator se = null; /* * Interface to objects able to generate a fixed set of results for a particular split of a dataset. * The set of results should contain fields related to any settings of the SplitEvaluator (not including the dataset name. * For example, one field for the classifier used to get the results, another for the classifier options, etc). * Possible implementations of SplitEvaluator: StdClassification results, StdRegression results. */ Classifier sec = null; boolean classification = false; if (option.equals("classification")) { classification = true; se = new ClassifierSplitEvaluator(); /* * A SplitEvaluator that produces results for a classification scheme on a nominal class attribute. */ sec = ((ClassifierSplitEvaluator) se).getClassifier(); } else if (option.equals("regression")) { se = new RegressionSplitEvaluator(); sec = ((RegressionSplitEvaluator) se).getClassifier(); } else { throw new IllegalArgumentException("Unknown experiment type '" + option + "'!"); } // crossvalidation or randomsplit option = Utils.getOption("splittype", args); if (option.length() == 0) throw new IllegalArgumentException("No split type provided!"); if (option.equals("crossvalidation")) { CrossValidationResultProducer cvrp = new CrossValidationResultProducer(); /* * Generates for each run, carries out an n-fold cross-validation, using the set SplitEvaluator to generate some results. * If the class attribute is nominal, the dataset is stratified. Results for each fold are generated, so you may wish to use * this in addition with an AveragingResultProducer to obtain averages for each run. */ option = Utils.getOption("folds", args); if (option.length() == 0) throw new IllegalArgumentException("No folds provided!"); cvrp.setNumFolds(Integer.parseInt(option)); cvrp.setSplitEvaluator(se); PropertyNode[] propertyPath = new PropertyNode[2]; /* * Stores information on a property of an object: the class of the object with the property; * the property descriptor, and the current value. */ try { propertyPath[0] = new PropertyNode(se, new PropertyDescriptor("splitEvaluator", CrossValidationResultProducer.class), CrossValidationResultProducer.class); propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()), se.getClass()); } catch (IntrospectionException e) { e.printStackTrace(); } exp.setResultProducer(cvrp); exp.setPropertyPath(propertyPath); } else if (option.equals("randomsplit")) { RandomSplitResultProducer rsrp = new RandomSplitResultProducer(); rsrp.setRandomizeData(true); option = Utils.getOption("percentage", args); if (option.length() == 0) throw new IllegalArgumentException("No percentage provided!"); rsrp.setTrainPercent(Double.parseDouble(option)); rsrp.setSplitEvaluator(se); PropertyNode[] propertyPath = new PropertyNode[2]; try { propertyPath[0] = new PropertyNode(se, new PropertyDescriptor("splitEvaluator", RandomSplitResultProducer.class), RandomSplitResultProducer.class); propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()), se.getClass()); } catch (IntrospectionException e) { e.printStackTrace(); } exp.setResultProducer(rsrp); exp.setPropertyPath(propertyPath); } else { throw new IllegalArgumentException("Unknown split type '" + option + "'!"); } // runs option = Utils.getOption("runs", args); if (option.length() == 0) throw new IllegalArgumentException("No runs provided!"); exp.setRunLower(1); exp.setRunUpper(Integer.parseInt(option)); // classifier option = Utils.getOption("classifier", args); if (option.length() == 0) throw new IllegalArgumentException("No classifier provided!"); String[] options = Utils.splitOptions(option); String classname = options[0]; options[0] = ""; Classifier c = (Classifier) Utils.forName(Classifier.class, classname, options); exp.setPropertyArray(new Classifier[] { c }); // datasets boolean data = false; DefaultListModel model = new DefaultListModel(); do { option = Utils.getOption("t", args); if (option.length() > 0) { File file = new File(option); if (!file.exists()) throw new IllegalArgumentException("File '" + option + "' does not exist!"); data = true; model.addElement(file); } } while (option.length() > 0); if (!data) throw new IllegalArgumentException("No data files provided!"); exp.setDatasets(model); // result option = Utils.getOption("result", args); if (option.length() == 0) throw new IllegalArgumentException("No result file provided!"); InstancesResultListener irl = new InstancesResultListener(); irl.setOutputFile(new File(option)); exp.setResultListener(irl); // 2. run experiment System.out.println("Initializing..."); exp.initialize(); System.out.println("Running..."); exp.runExperiment(); System.out.println("Finishing..."); exp.postProcess(); // 3. calculate statistics and output them System.out.println("Evaluating..."); PairedTTester tester = new PairedCorrectedTTester(); /* * Calculates T-Test statistics on data stored in a set of instances. */ Instances result = new Instances(new BufferedReader(new FileReader(irl.getOutputFile()))); tester.setInstances(result); tester.setSortColumn(-1); tester.setRunColumn(result.attribute("Key_Run").index()); if (classification) tester.setFoldColumn(result.attribute("Key_Fold").index()); tester.setDatasetKeyColumns(new Range("" + (result.attribute("Key_Dataset").index() + 1))); tester.setResultsetKeyColumns(new Range("" + (result.attribute("Key_Scheme").index() + 1) + "," + (result.attribute("Key_Scheme_options").index() + 1) + "," + (result.attribute("Key_Scheme_version_ID").index() + 1))); tester.setResultMatrix(new ResultMatrixPlainText()); tester.setDisplayedResultsets(null); tester.setSignificanceLevel(0.05); tester.setShowStdDevs(true); // fill result matrix (but discarding the output) if (classification) tester.multiResultsetFull(0, result.attribute("Percent_correct").index()); else tester.multiResultsetFull(0, result.attribute("Correlation_coefficient").index()); // output results for reach dataset System.out.println("\nResult:"); ResultMatrix matrix = tester.getResultMatrix(); for (int i = 0; i < matrix.getColCount(); i++) { System.out.println(matrix.getColName(i)); System.out.println(" Perc. correct: " + matrix.getMean(i, 0)); System.out.println(" StdDev: " + matrix.getStdDev(i, 0)); } }
From source file:PrincipalComponents.java
License:Open Source License
private void buildAttributeConstructor(Instances data) throws Exception { m_eigenvalues = null;/*from w w w .j av a 2 s. co m*/ m_outputNumAtts = -1; m_attributeFilter = null; m_nominalToBinFilter = null; m_sumOfEigenValues = 0.0; m_trainInstances = new Instances(data); // make a copy of the training data so that we can get the class // column to append to the transformed data (if necessary) m_trainHeader = new Instances(m_trainInstances, 0); m_replaceMissingFilter = new ReplaceMissingValues(); m_replaceMissingFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_replaceMissingFilter); /* * if (m_normalize) { m_normalizeFilter = new Normalize(); * m_normalizeFilter.setInputFormat(m_trainInstances); m_trainInstances * = Filter.useFilter(m_trainInstances, m_normalizeFilter); } */ m_nominalToBinFilter = new NominalToBinary(); m_nominalToBinFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_nominalToBinFilter); // delete any attributes with only one distinct value or are all missing Vector<Integer> deleteCols = new Vector<Integer>(); for (int i = 0; i < m_trainInstances.numAttributes(); i++) { if (m_trainInstances.numDistinctValues(i) <= 1) { deleteCols.addElement(new Integer(i)); } } if (m_trainInstances.classIndex() >= 0) { // get rid of the class column m_hasClass = true; m_classIndex = m_trainInstances.classIndex(); deleteCols.addElement(new Integer(m_classIndex)); } // remove columns from the data if necessary if (deleteCols.size() > 0) { m_attributeFilter = new Remove(); int[] todelete = new int[deleteCols.size()]; for (int i = 0; i < deleteCols.size(); i++) { todelete[i] = (deleteCols.elementAt(i)).intValue(); } m_attributeFilter.setAttributeIndicesArray(todelete); m_attributeFilter.setInvertSelection(false); m_attributeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter); } // can evaluator handle the processed data ? e.g., enough attributes? getCapabilities().testWithFail(m_trainInstances); m_numInstances = m_trainInstances.numInstances(); m_numAttribs = m_trainInstances.numAttributes(); fillCovariance(); SymmDenseEVD evd = SymmDenseEVD.factorize(m_correlation); m_eigenvectors = Matrices.getArray(evd.getEigenvectors()); m_eigenvalues = evd.getEigenvalues(); /* * for (int i = 0; i < m_numAttribs; i++) { for (int j = 0; j < * m_numAttribs; j++) { System.err.println(v[i][j] + " "); } * System.err.println(d[i]); } */ // any eigenvalues less than 0 are not worth anything --- change to 0 for (int i = 0; i < m_eigenvalues.length; i++) { if (m_eigenvalues[i] < 0) { m_eigenvalues[i] = 0.0; } } m_sortedEigens = Utils.sort(m_eigenvalues); m_sumOfEigenValues = Utils.sum(m_eigenvalues); m_transformedFormat = setOutputFormat(); if (m_transBackToOriginal) { m_originalSpaceFormat = setOutputFormatOriginal(); // new ordered eigenvector matrix int numVectors = (m_transformedFormat.classIndex() < 0) ? m_transformedFormat.numAttributes() : m_transformedFormat.numAttributes() - 1; double[][] orderedVectors = new double[m_eigenvectors.length][numVectors + 1]; // try converting back to the original space for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) { for (int j = 0; j < m_numAttribs; j++) { orderedVectors[j][m_numAttribs - i] = m_eigenvectors[j][m_sortedEigens[i]]; } } // transpose the matrix int nr = orderedVectors.length; int nc = orderedVectors[0].length; m_eTranspose = new double[nc][nr]; for (int i = 0; i < nc; i++) { for (int j = 0; j < nr; j++) { m_eTranspose[i][j] = orderedVectors[j][i]; } } } }
From source file:PrincipalComponents.java
License:Open Source License
/** * Gets the transformed training data.//from w w w.j a va2 s . c om * * @return the transformed training data * @throws Exception if transformed data can't be returned */ @Override public Instances transformedData(Instances data) throws Exception { if (m_eigenvalues == null) { throw new Exception("Principal components hasn't been built yet"); } Instances output = null; if (m_transBackToOriginal) { output = new Instances(m_originalSpaceFormat); } else { output = new Instances(m_transformedFormat); } for (int i = 0; i < data.numInstances(); i++) { Instance converted = convertInstance(data.instance(i)); output.add(converted); } return output; }
From source file:BaggingImprove.java
/** * Bagging method.//from ww w .j av a2s.c om * * @param data the training data to be used for generating the bagged * classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); //data.deleteWithMissingClass(); super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } //+ System.out.println("Classifier length" + m_Classifiers.length); int bagSize = data.numInstances() * m_BagSizePercent / 100; //+ System.out.println("Bag Size " + bagSize); Random random = new Random(m_Seed); boolean[][] inBag = null; if (m_CalcOutOfBag) { inBag = new boolean[m_Classifiers.length][]; } //+ //inisialisasi nama penamaan model BufferedWriter writer = new BufferedWriter(new FileWriter("Bootstrap.txt")); for (int j = 0; j < m_Classifiers.length; j++) { Instances bagData = null; // create the in-bag dataset if (m_CalcOutOfBag) { inBag[j] = new boolean[data.numInstances()]; //System.out.println("Inbag1 " + inBag[0][1]); //bagData = resampleWithWeights(data, random, inBag[j]); bagData = data.resampleWithWeights(random, inBag[j]); //System.out.println("num after resample " + bagData.numInstances()); //+ // for (int k = 0; k < bagData.numInstances(); k++) { // System.out.println("Bag Data after resample [calc out bag]" + bagData.instance(k)); // } } else { //+ System.out.println("Not m_Calc out of bag"); System.out.println("Please configure code inside!"); bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } if (m_Classifier instanceof Randomizable) { //+ System.out.println("Randomizable"); ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); } //write bootstrap into file writer.write("Bootstrap " + j); writer.newLine(); writer.write(bagData.toString()); writer.newLine(); System.out.println("Berhasil menyimpan bootstrap ke file "); System.out.println("Bootstrap " + j + 1); // textarea.append("\nBootsrap " + (j + 1)); //System.out.println("num instance kedua kali "+bagData.numInstances()); for (int b = 1; b < bagData.numInstances(); b++) { System.out.println("" + bagData.instance(b)); // textarea.append("\n" + bagData.instance(b)); } // //+ // build the classifier m_Classifiers[j].buildClassifier(bagData); // //+ // // SerializationHelper serialization = new SerializationHelper(); // serialization.write("KnnData"+model+".model", m_Classifiers[j]); // System.out.println("Finish write into model"); // model++; } writer.flush(); writer.close(); // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = data.classAttribute().isNumeric(); for (int i = 0; i < data.numInstances(); i++) { double vote; double[] votes; if (numeric) { votes = new double[1]; } else { votes = new double[data.numClasses()]; } // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (inBag[j][i]) { continue; } voteCount++; // double pred = m_Classifiers[j].classifyInstance(data.instance(i)); if (numeric) { // votes[0] += pred; votes[0] = m_Classifiers[j].classifyInstance(data.instance(i)); } else { // votes[(int) pred]++; double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i)); //- // for(double a : newProbs) // { // System.out.println("Double new probs %.f "+a); // } // average the probability estimates for (int k = 0; k < newProbs.length; k++) { votes[k] += newProbs[k]; } } } System.out.println("Vote count %d" + voteCount); // "vote" if (numeric) { vote = votes[0]; if (voteCount > 0) { vote /= voteCount; // average } } else { if (Utils.eq(Utils.sum(votes), 0)) { } else { Utils.normalize(votes); } vote = Utils.maxIndex(votes); // predicted class //- System.out.println("Vote " + vote); } // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else if (vote != data.instance(i).classValue()) { //+ System.out.println("Vote terakhir" + data.instance(i).classValue()); errorSum += data.instance(i).weight(); } } m_OutOfBagError = errorSum / outOfBagCount; } else { m_OutOfBagError = 0; } }
From source file:CrossValidationMultipleRuns.java
License:Open Source License
/** * Performs the cross-validation. See Javadoc of class for information * on command-line parameters./*from ww w . j av a 2s . co m*/ * * @param args the command-line parameters * @throws Exception if something goes wrong */ public static void main(String[] args) throws Exception { // loads data and set class index Instances data = DataSource.read(Utils.getOption("t", args)); String clsIndex = Utils.getOption("c", args); if (clsIndex.length() == 0) clsIndex = "last"; if (clsIndex.equals("first")) data.setClassIndex(0); else if (clsIndex.equals("last")) data.setClassIndex(data.numAttributes() - 1); else data.setClassIndex(Integer.parseInt(clsIndex) - 1); // classifier String[] tmpOptions; String classname; tmpOptions = Utils.splitOptions(Utils.getOption("W", args)); classname = tmpOptions[0]; tmpOptions[0] = ""; Classifier cls = (Classifier) Utils.forName(Classifier.class, classname, tmpOptions); // other options int runs = Integer.parseInt(Utils.getOption("r", args)); int folds = Integer.parseInt(Utils.getOption("x", args)); // perform cross-validation for (int i = 0; i < runs; i++) { // randomize data int seed = i + 1; Random rand = new Random(seed); Instances randData = new Instances(data); randData.randomize(rand); //if (randData.classAttribute().isNominal()) // randData.stratify(folds); Evaluation eval = new Evaluation(randData); StringBuilder optionsString = new StringBuilder(); for (String s : cls.getOptions()) { optionsString.append(s); optionsString.append(" "); } // output evaluation System.out.println(); System.out.println("=== Setup run " + (i + 1) + " ==="); System.out.println("Classifier: " + optionsString.toString()); System.out.println("Dataset: " + data.relationName()); System.out.println("Folds: " + folds); System.out.println("Seed: " + seed); System.out.println(); for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // build and evaluate classifier Classifier clsCopy = Classifier.makeCopy(cls); clsCopy.buildClassifier(train); eval.evaluateModel(clsCopy, test); System.out.println(eval.toClassDetailsString()); } System.out.println( eval.toSummaryString("=== " + folds + "-fold Cross-validation run " + (i + 1) + " ===", false)); } }
From source file:REPTree.java
License:Open Source License
/** * Builds classifier.//from w w w. jav a 2 s . c om * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:A_MachineLearning.java
private void jButton7ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton7ActionPerformed Instances data;/*from w ww. ja va 2 s .c o m*/ try { data = new Instances(new BufferedReader(new FileReader(this.file2 + ".arff"))); Instances newData = null; Add filter; newData = new Instances(data); filter = new Add(); filter.setAttributeIndex("last"); filter.setNominalLabels("rods,punctua,networks"); filter.setAttributeName("target"); filter.setInputFormat(newData); newData = Filter.useFilter(newData, filter); System.out.print(newData); Vector vec = new Vector(); newData.setClassIndex(newData.numAttributes() - 1); if (!newData.equalHeaders(newData)) { throw new IllegalArgumentException("Train and test are not compatible!"); } URL urlToModel = this.getClass().getResource("/" + "Final.model"); InputStream stream = urlToModel.openStream(); Classifier cls = (Classifier) weka.core.SerializationHelper.read(stream); System.out.println("PROVANT MODEL.classifyInstance"); for (int i = 0; i < newData.numInstances(); i++) { double pred = cls.classifyInstance(newData.instance(i)); double[] dist = cls.distributionForInstance(newData.instance(i)); System.out.print((i + 1) + " - "); System.out.print(newData.classAttribute().value((int) pred) + " - "); //txtarea2.setText(Utils.arrayToString(dist)); System.out.println(Utils.arrayToString(dist)); vec.add(newData.classAttribute().value((int) pred)); } int p = 0, n = 0, r = 0; //txtarea2.append(Utils.arrayToString(this.target)); for (Object vec1 : vec) { if ("rods".equals(vec1.toString())) { r = r + 1; } if ("punctua".equals(vec1.toString())) { p = p + 1; } if ("networks".equals(vec1.toString())) { n = n + 1; } PrintWriter out = null; try { out = new PrintWriter(this.file2 + "_morphology.txt"); out.println(vec); out.close(); } catch (Exception ex) { ex.printStackTrace(); } //System.out.println(vec.get(i)); } System.out.println("VECTOR-> punctua: " + p + ", rods: " + r + ", networks: " + n); IJ.showMessage( "Your file:" + this.file2 + "arff" + "\nhas been analysed, and it is composed by-> punctua: " + p + ", rods: " + r + ", networks: " + n); txtresults1.setText( "Your file:" + this.file2 + "arff" + "\nhas been analysed, and it is composed by: \npunctua: " + p + ", rods: " + r + ", networks: " + n); } catch (IOException ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(MachinLearningInterface.class.getName()).log(Level.SEVERE, null, ex); } IJ.showMessage("analysing complete "); }
From source file:Pair.java
License:Open Source License
public void setOptions(String[] options) throws Exception { String sourceFileName = Utils.getOption('S', options); if (sourceFileName.length() == 0) { throw new Exception("A filename must be specified with" + " the -S option."); } else {/*from w w w . ja v a 2 s . c om*/ setSourceFile(new File(sourceFileName)); } doFraction = (Utils.getFlag('F', options)); doBagging = (Utils.getFlag('B', options)); doUpsource = (Utils.getFlag('U', options)); useMedian = (Utils.getFlag('M', options)); resample = (Utils.getFlag('R', options)); doSampleSize = Utils.getFlag("SS", options); fixedBeta = Utils.getFlag("FB", options); String optionString = Utils.getOption("TT", options); testData = new Instances(new BufferedReader(new FileReader(optionString))); testData.setClassIndex(testData.numAttributes() - 1); String r = Utils.getOption("Ratio", options); if (!r.equals("")) sourceRatio = Double.parseDouble(r); super.setOptions(options); r = Utils.getOption("II", options); if (!r.equals("")) sourceIterations = Integer.parseInt(r); else sourceIterations = m_NumIterations; }