List of usage examples for weka.core Instances testCV
public Instances testCV(int numFolds, int numFold)
From source file:mulan.evaluation.Evaluator.java
License:Open Source License
private MultipleEvaluation innerCrossValidate(MultiLabelLearner learner, MultiLabelInstances data, boolean hasMeasures, List<Measure> measures, int someFolds) { Evaluation[] evaluation = new Evaluation[someFolds]; Instances workingSet = new Instances(data.getDataSet()); workingSet.randomize(new Random(seed)); for (int i = 0; i < someFolds; i++) { System.out.println("Fold " + (i + 1) + "/" + someFolds); try {//from ww w . ja va 2 s. c o m Instances train = workingSet.trainCV(someFolds, i); Instances test = workingSet.testCV(someFolds, i); MultiLabelInstances mlTrain = new MultiLabelInstances(train, data.getLabelsMetaData()); MultiLabelInstances mlTest = new MultiLabelInstances(test, data.getLabelsMetaData()); MultiLabelLearner clone = learner.makeCopy(); clone.build(mlTrain); if (hasMeasures) evaluation[i] = evaluate(clone, mlTest, measures); else evaluation[i] = evaluate(clone, mlTest); } catch (Exception ex) { Logger.getLogger(Evaluator.class.getName()).log(Level.SEVERE, null, ex); } } return new MultipleEvaluation(evaluation); }
From source file:net.sf.bddbddb.order.WekaInterface.java
License:LGPL
public static double cvError(int numFolds, Instances data0, String cClassName) { if (data0.numInstances() < numFolds) return Double.NaN; //more folds than elements if (numFolds == 0) return Double.NaN; // no folds if (data0.numInstances() == 0) return 0; //no instances Instances data = new Instances(data0); //data.randomize(new Random(System.currentTimeMillis())); data.stratify(numFolds);//from w ww . j av a 2s . c o m Assert._assert(data.classAttribute() != null); double[] estimates = new double[numFolds]; for (int i = 0; i < numFolds; ++i) { Instances trainData = data.trainCV(numFolds, i); Assert._assert(trainData.classAttribute() != null); Assert._assert(trainData.numInstances() != 0, "Cannot train classifier on 0 instances."); Instances testData = data.testCV(numFolds, i); Assert._assert(testData.classAttribute() != null); Assert._assert(testData.numInstances() != 0, "Cannot test classifier on 0 instances."); int temp = FindBestDomainOrder.TRACE; FindBestDomainOrder.TRACE = 0; Classifier classifier = buildClassifier(cClassName, trainData); FindBestDomainOrder.TRACE = temp; int count = testData.numInstances(); double loss = 0; double sum = 0; for (Enumeration e = testData.enumerateInstances(); e.hasMoreElements();) { Instance instance = (Instance) e.nextElement(); Assert._assert(instance != null); Assert._assert(instance.classAttribute() != null && instance.classAttribute() == trainData.classAttribute()); try { double testClass = classifier.classifyInstance(instance); double weight = instance.weight(); if (testClass != instance.classValue()) loss += weight; sum += weight; } catch (Exception ex) { FindBestDomainOrder.out.println("Exception while classifying: " + instance + "\n" + ex); } } estimates[i] = 1 - loss / sum; } double average = 0; for (int i = 0; i < numFolds; ++i) average += estimates[i]; return average / numFolds; }
From source file:org.mcennis.graphrat.algorithm.machinelearning.WekaClassifierMultiAttribute.java
License:Open Source License
@Override public void execute(Graph g) { Actor[] source = g.getActor((String) parameter[1].getValue()); if (source != null) { // create the atributes for each artist FastVector sourceTypes = new FastVector(); Actor[] dest = g.getActor((String) parameter[3].getValue()); if (dest != null) { // create the Instances set backing this object Instances masterSet = null; Instance[] trainingData = new Instance[source.length]; for (int i = 0; i < source.length; ++i) { // First, acquire the instance objects for each actor Property p = null;/*from w ww .java2 s . co m*/ if ((Boolean) parameter[10].getValue()) { p = source[i].getProperty((String) parameter[2].getValue() + g.getID()); } else { p = source[i].getProperty((String) parameter[2].getValue()); } if (p != null) { Object[] values = p.getValue(); if (values.length > 0) { sourceTypes.addElement(source[i].getID()); trainingData[i] = (Instance) ((Instance) values[0]).copy(); // assume that this Instance has a backing dataset // that contains all Instance objects to be tested if (masterSet == null) { masterSet = new Instances(trainingData[i].dataset(), source.length); } masterSet.add(trainingData[i]); sourceTypes.addElement(source[i].getID()); } else { trainingData[i] = null; Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING, "Actor " + source[i].getType() + ":" + source[i].getID() + " does not have an Instance value of property ID " + p.getType()); } } else { trainingData[i] = null; Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING, "Actor " + source[i].getType() + ":" + source[i].getID() + " does not have a property of ID " + p.getType()); } } Vector<Attribute> destVector = new Vector<Attribute>(); for (int i = 0; i < dest.length; ++i) { FastVector type = new FastVector(); type.addElement("false"); type.addElement("true"); Attribute tmp = new Attribute(dest[i].getID(), type); destVector.add(tmp); masterSet.insertAttributeAt(tmp, masterSet.numAttributes()); } Attribute sourceID = new Attribute("sourceID", sourceTypes); masterSet.insertAttributeAt(sourceID, masterSet.numAttributes()); //set ground truth for evaluation for (int i = 0; i < masterSet.numInstances(); ++i) { Instance inst = masterSet.instance(i); Actor user = g.getActor((String) parameter[i].getValue(), sourceID.value((int) inst.value(sourceID))); if (user != null) { for (int j = 0; j < dest.length; ++j) { if (g.getLink((String) parameter[4].getValue(), user, dest[j]) != null) { inst.setValue(sourceID, "true"); } else { if ((Boolean) parameter[9].getValue()) { inst.setValue(sourceID, "false"); } else { inst.setValue(sourceID, Double.NaN); } } } } else { Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, "Actor " + sourceID.value((int) inst.value(sourceID)) + " does not exist in graph"); } } // perform cross fold evaluation of each classifier in turn String[] opts = ((String) parameter[9].getValue()).split("\\s+"); Properties props = new Properties(); if ((Boolean) parameter[11].getValue()) { props.setProperty("LinkType", (String) parameter[5].getValue() + g.getID()); } else { props.setProperty("LinkType", (String) parameter[5].getValue()); } props.setProperty("LinkClass", "Basic"); try { for (int destCount = 0; destCount < dest.length; ++destCount) { masterSet.setClass(destVector.get(destCount)); for (int i = 0; i < (Integer) parameter[8].getValue(); ++i) { Instances test = masterSet.testCV((Integer) parameter[8].getValue(), i); Instances train = masterSet.testCV((Integer) parameter[8].getValue(), i); Classifier classifier = (Classifier) ((Class) parameter[7].getValue()).newInstance(); classifier.setOptions(opts); classifier.buildClassifier(train); for (int j = 0; j < test.numInstances(); ++j) { String sourceName = sourceID.value((int) test.instance(j).value(sourceID)); double result = classifier.classifyInstance(test.instance(j)); String predicted = masterSet.classAttribute().value((int) result); Link derived = LinkFactory.newInstance().create(props); derived.set(g.getActor((String) parameter[2].getValue(), sourceName), 1.0, g.getActor((String) parameter[3].getValue(), predicted)); g.add(derived); } } } } catch (InstantiationException ex) { Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex); } catch (IllegalAccessException ex) { Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex); } } else { // dest==null Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING, "Ground truth mode '" + (String) parameter[3].getValue() + "' has no actors"); } } else { // source==null Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING, "Source mode '" + (String) parameter[2].getValue() + "' has no actors"); } }
From source file:org.mcennis.graphrat.algorithm.machinelearning.WekaClassifierOneAttribute.java
License:Open Source License
@Override public void execute(Graph g) { Actor[] source = g.getActor((String) parameter[1].getValue()); if (source != null) { // create the Instance sets for each ac FastVector classTypes = new FastVector(); FastVector sourceTypes = new FastVector(); Actor[] dest = g.getActor((String) parameter[3].getValue()); if (dest != null) { for (int i = 0; i < dest.length; ++i) { classTypes.addElement(dest[i].getID()); }/*from w w w. java 2 s .c o m*/ Attribute classAttribute = new Attribute((String) parameter[5].getValue(), classTypes); Instance[] trainingData = new Instance[source.length]; Instances masterSet = null; for (int i = 0; i < source.length; ++i) { // First, acquire the instance objects for each actor Property p = null; if ((Boolean) parameter[9].getValue()) { p = source[i].getProperty((String) parameter[2].getValue() + g.getID()); } else { p = source[i].getProperty((String) parameter[2].getValue()); } if (p != null) { Object[] values = p.getValue(); if (values.length > 0) { sourceTypes.addElement(source[i].getID()); trainingData[i] = (Instance) ((Instance) values[0]).copy(); // assume that this Instance has a backing dataset // that contains all Instance objects to be tested if (masterSet == null) { masterSet = new Instances(trainingData[i].dataset(), source.length); } masterSet.add(trainingData[i]); } else { trainingData[i] = null; Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING, "Actor " + source[i].getType() + ":" + source[i].getID() + " does not have an Instance value of property ID " + p.getType()); } } else { trainingData[i] = null; Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING, "Actor " + source[i].getType() + ":" + source[i].getID() + " does not have a property of ID " + p.getType()); } } // for every actor, fix the instance Attribute sourceID = new Attribute("sourceID", sourceTypes); masterSet.insertAttributeAt(sourceID, masterSet.numAttributes()); masterSet.insertAttributeAt(classAttribute, masterSet.numAttributes()); masterSet.setClass(classAttribute); for (int i = 0; i < source.length; ++i) { if (trainingData[i] != null) { trainingData[i].setValue(sourceID, source[i].getID()); Link[] link = g.getLinkBySource((String) parameter[4].getValue(), source[i]); if (link == null) { trainingData[i].setClassValue(Double.NaN); } else { trainingData[i].setClassValue(link[0].getDestination().getID()); } } } String[] opts = ((String) parameter[7].getValue()).split("\\s+"); Properties props = new Properties(); if ((Boolean) parameter[10].getValue()) { props.setProperty("LinkType", (String) parameter[5].getValue() + g.getID()); } else { props.setProperty("LinkType", (String) parameter[5].getValue()); } props.setProperty("LinkClass", "Basic"); try { for (int i = 0; i < (Integer) parameter[8].getValue(); ++i) { Instances test = masterSet.testCV((Integer) parameter[8].getValue(), i); Instances train = masterSet.testCV((Integer) parameter[8].getValue(), i); Classifier classifier = (Classifier) ((Class) parameter[6].getValue()).newInstance(); classifier.setOptions(opts); classifier.buildClassifier(train); for (int j = 0; j < test.numInstances(); ++j) { String sourceName = sourceID.value((int) test.instance(j).value(sourceID)); double result = classifier.classifyInstance(test.instance(j)); String predicted = masterSet.classAttribute().value((int) result); Link derived = LinkFactory.newInstance().create(props); derived.set(g.getActor((String) parameter[2].getValue(), sourceName), 1.0, g.getActor((String) parameter[3].getValue(), predicted)); g.add(derived); } } } catch (InstantiationException ex) { Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex); } catch (IllegalAccessException ex) { Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex); } } else { // dest==null Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING, "Ground truth mode '" + (String) parameter[3].getValue() + "' has no actors"); } } else { // source==null Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING, "Source mode '" + (String) parameter[2].getValue() + "' has no actors"); } }
From source file:org.scripps.branch.classifier.ManualTree.java
License:Open Source License
/** * Builds classifier.//www . java2s. com * * @param data * the data to train with * @throws Exception * if something goes wrong or the data doesn't fit */ @Override public void buildClassifier(Instances data) throws Exception { // Make sure K value is in range if (m_KValue > data.numAttributes() - 1) m_KValue = data.numAttributes() - 1; if (m_KValue < 1) m_KValue = (int) Utils.log2(data.numAttributes()) + 1; // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } // Figure out appropriate datasets Instances train = null; Instances backfit = null; Random rand = data.getRandomNumberGenerator(m_randomSeed); if (m_NumFolds <= 0) { train = data; } else { data.randomize(rand); data.stratify(m_NumFolds); train = data.trainCV(m_NumFolds, 1, rand); backfit = data.testCV(m_NumFolds, 1); } //Set Default Instances for selection. setRequiredInst(data); // Create the attribute indices window int[] attIndicesWindow = new int[data.numAttributes() - 1]; int j = 0; for (int i = 0; i < attIndicesWindow.length; i++) { if (j == data.classIndex()) j++; // do not include the class attIndicesWindow[i] = j++; } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); classProbs[(int) inst.classValue()] += inst.weight(); } Instances requiredInstances = getRequiredInst(); // Build tree if (jsontree != null) { buildTree(train, classProbs, new Instances(data, 0), m_Debug, 0, jsontree, 0, m_distributionData, requiredInstances, listOfFc, cSetList, ccSer, d); } else { System.out.println("No json tree specified, failing to process tree"); } setRequiredInst(requiredInstances); // Backfit if required if (backfit != null) { backfitData(backfit); } }
From source file:semana07.IrisKnn.java
public static void main(String[] args) throws FileNotFoundException, IOException, Exception { // DEFININDO CONJUNTO DE TREINAMENTO // - Definindo o leitor do arquivo arff FileReader baseIris = new FileReader("iris.arff"); // - Definindo o grupo de instancias a partir do arquivo "simpsons.arff" Instances iris = new Instances(baseIris); // - Definindo o indice do atributo classe iris.setClassIndex(4);// w w w . j av a 2s. c o m iris = iris.resample(new Debug.Random()); Instances irisTreino = iris.trainCV(3, 0); Instances irisTeste = iris.testCV(3, 0); // DEFININDO EXEMPLO DESCONHECIDO //5.9,3.0,5.1,1.8,Iris-virginica Instance irisInst = new DenseInstance(iris.numAttributes()); irisInst.setDataset(iris); irisInst.setValue(0, 5.9); irisInst.setValue(1, 3.0); irisInst.setValue(2, 5.1); irisInst.setValue(3, 1.8); // DEFININDO ALGORITMO DE CLASSIFICAO //NN IBk vizinhoIris = new IBk(); //kNN IBk knnIris = new IBk(3); // MONTANDO CLASSIFICADOR //NN vizinhoIris.buildClassifier(irisTreino); //kNN knnIris.buildClassifier(irisTreino); // Definindo arquivo a ser escrito FileWriter writer = new FileWriter("iris.csv"); // Escrevendo o cabealho do arquivo writer.append("Classe Real;Resultado NN;Resultado kNN"); writer.append(System.lineSeparator()); // Sada CLI / Console System.out.println("Classe Real;Resultado NN;Resultado kNN"); //Cabealho for (int i = 0; i <= irisTeste.numInstances() - 1; i++) { Instance testeIris = irisTeste.instance(i); // Sada CLI / Console do valor original System.out.print(testeIris.stringValue(4) + ";"); // Escrevendo o valor original no arquivo writer.append(testeIris.stringValue(4) + ";"); // Definindo o atributo classe como indefinido testeIris.setClassMissing(); // CLASSIFICANDO A INSTANCIA // NN double respostaVizinho = vizinhoIris.classifyInstance(testeIris); testeIris.setValue(4, respostaVizinho); String stringVizinho = testeIris.stringValue(4); //kNN double respostaKnn = knnIris.classifyInstance(testeIris); // Atribuindo respota ao valor do atributo do index 4(classe) testeIris.setValue(4, respostaKnn); String stringKnn = testeIris.stringValue(4); // Adicionando resultado ao grupo de instancia iris iris.add(irisInst); //Escrevendo os resultados no arquivo iris.csv writer.append(stringVizinho + ";"); writer.append(stringKnn + ";"); writer.append(System.lineSeparator()); // Exibindo via CLI / Console o resultado System.out.print(respostaVizinho + ";"); System.out.print(respostaKnn + ";"); System.out.println(testeIris.stringValue(4)); } writer.flush(); writer.close(); }
From source file:sentinets.Prediction.java
License:Open Source License
public String updateModel(String inputFile, ArrayList<Double[]> metrics) { String output = ""; this.setInstances(inputFile); FilteredClassifier fcls = (FilteredClassifier) this.cls; SGD cls = (SGD) fcls.getClassifier(); Filter filter = fcls.getFilter(); Instances insAll;/*from ww w .j a va2s . c om*/ try { insAll = Filter.useFilter(this.unlabled, filter); if (insAll.size() > 0) { Random rand = new Random(10); int folds = 10 > insAll.size() ? 2 : 10; Instances randData = new Instances(insAll); randData.randomize(rand); if (randData.classAttribute().isNominal()) { randData.stratify(folds); } Evaluation eval = new Evaluation(randData); eval.evaluateModel(cls, insAll); System.out.println("Initial Evaluation"); System.out.println(eval.toSummaryString()); System.out.println(eval.toClassDetailsString()); metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() }); output += "\n====" + "Initial Evaluation" + "====\n"; output += "\n" + eval.toSummaryString(); output += "\n" + eval.toClassDetailsString(); System.out.println("Cross Validated Evaluation"); output += "\n====" + "Cross Validated Evaluation" + "====\n"; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); for (int i = 0; i < train.numInstances(); i++) { cls.updateClassifier(train.instance(i)); } eval.evaluateModel(cls, test); System.out.println("Cross Validated Evaluation fold: " + n); output += "\n====" + "Cross Validated Evaluation fold (" + n + ")====\n"; System.out.println(eval.toSummaryString()); System.out.println(eval.toClassDetailsString()); output += "\n" + eval.toSummaryString(); output += "\n" + eval.toClassDetailsString(); metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() }); } for (int i = 0; i < insAll.numInstances(); i++) { cls.updateClassifier(insAll.instance(i)); } eval.evaluateModel(cls, insAll); System.out.println("Final Evaluation"); System.out.println(eval.toSummaryString()); System.out.println(eval.toClassDetailsString()); output += "\n====" + "Final Evaluation" + "====\n"; output += "\n" + eval.toSummaryString(); output += "\n" + eval.toClassDetailsString(); metrics.add(new Double[] { eval.fMeasure(0), eval.fMeasure(1), eval.weightedFMeasure() }); fcls.setClassifier(cls); String modelFilePath = outputDir + "/" + Utils.getOutDir(Utils.OutDirIndex.MODELS) + "/updatedClassifier.model"; weka.core.SerializationHelper.write(modelFilePath, fcls); output += "\n" + "Updated Model saved at: " + modelFilePath; } else { output += "No new instances for training the model."; } } catch (Exception e) { e.printStackTrace(); } return output; }
From source file:sirius.clustering.main.ClustererClassificationPane.java
License:Open Source License
private void start() { //Run Classifier if (this.inputDirectoryTextField.getText().length() == 0) { JOptionPane.showMessageDialog(parent, "Please set Input Directory to where the clusterer output are!", "Evaluate Classifier", JOptionPane.ERROR_MESSAGE); return;/*from w w w . ja va2 s . c o m*/ } if (m_ClassifierEditor.getValue() == null) { JOptionPane.showMessageDialog(parent, "Please choose Classifier!", "Evaluate Classifier", JOptionPane.ERROR_MESSAGE); return; } if (validateStatsSettings(1) == false) { return; } if (this.clusteringClassificationThread == null) { startButton.setEnabled(false); stopButton.setEnabled(true); tabbedClassifierPane.setSelectedIndex(0); this.clusteringClassificationThread = (new Thread() { public void run() { //Clear the output text area levelOneClassifierOutputTextArea.setText(""); resultsTableModel.reset(); //double threshold = Double.parseDouble(classifierOneThresholdTextField.getText()); //cross-validation int numFolds; if (jackKnifeRadioButton.isSelected()) numFolds = -1; else numFolds = Integer.parseInt(foldsField.getText()); StringTokenizer st = new StringTokenizer(inputDirectoryTextField.getText(), File.separator); String filename = ""; while (st.hasMoreTokens()) { filename = st.nextToken(); } StringTokenizer st2 = new StringTokenizer(filename, "_."); numOfCluster = 0; if (st2.countTokens() >= 2) { st2.nextToken(); String numOfClusterString = st2.nextToken().replaceAll("cluster", ""); try { numOfCluster = Integer.parseInt(numOfClusterString); } catch (NumberFormatException e) { JOptionPane.showMessageDialog(parent, "Please choose the correct file! (Output from Utilize Clusterer)", "ERROR", JOptionPane.ERROR_MESSAGE); } } Classifier template = (Classifier) m_ClassifierEditor.getValue(); for (int x = 0; x <= numOfCluster && clusteringClassificationThread != null; x++) {//Test each cluster try { long totalTimeStart = 0, totalTimeElapsed = 0; totalTimeStart = System.currentTimeMillis(); statusLabel.setText("Reading in cluster" + x + " file.."); String inputFilename = inputDirectoryTextField.getText() .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".arff"); String outputScoreFilename = inputDirectoryTextField.getText() .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".score"); BufferedWriter output = new BufferedWriter(new FileWriter(outputScoreFilename)); Instances inst = new Instances(new FileReader(inputFilename)); //Assume that class attribute is the last attribute - This should be the case for all Sirius produced Arff files inst.setClassIndex(inst.numAttributes() - 1); Random random = new Random(1);//Simply set to 1, shall implement the random seed option later inst.randomize(random); if (inst.attribute(inst.classIndex()).isNominal()) inst.stratify(numFolds); // for timing ClassifierResults classifierResults = new ClassifierResults(false, 0); String classifierName = m_ClassifierEditor.getValue().getClass().getName(); classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ", classifierName); classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ", inputFilename); classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ", "NA"); //ArrayList<Double> resultList = new ArrayList<Double>(); if (jackKnifeRadioButton.isSelected() || numFolds > inst.numInstances() - 1) numFolds = inst.numInstances() - 1; for (int fold = 0; fold < numFolds && clusteringClassificationThread != null; fold++) {//Doing cross-validation statusLabel.setText("Cluster: " + x + " - Training Fold " + (fold + 1) + ".."); Instances train = inst.trainCV(numFolds, fold, random); Classifier current = null; try { current = Classifier.makeCopy(template); current.buildClassifier(train); Instances test = inst.testCV(numFolds, fold); statusLabel.setText("Cluster: " + x + " - Testing Fold " + (fold + 1) + ".."); for (int jj = 0; jj < test.numInstances(); jj++) { double[] result = current.distributionForInstance(test.instance(jj)); output.write("Cluster: " + x); output.newLine(); output.newLine(); output.write(test.instance(jj).stringValue(test.classAttribute()) + ",0=" + result[0]); output.newLine(); } } catch (Exception ex) { ex.printStackTrace(); statusLabel.setText("Error in cross-validation!"); startButton.setEnabled(true); stopButton.setEnabled(false); } } output.close(); totalTimeElapsed = System.currentTimeMillis() - totalTimeStart; classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ", Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes " + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds"); double threshold = validateFieldAsThreshold(classifierOneThresholdTextField.getText(), "Threshold Field", classifierOneThresholdTextField); String filename2 = inputDirectoryTextField.getText() .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".score"); PredictionStats classifierStats = new PredictionStats(filename2, 0, threshold); resultsTableModel.add("Cluster " + x, classifierResults, classifierStats); resultsTable.setRowSelectionInterval(x, x); computeStats(numFolds);//compute and display the results } catch (Exception e) { e.printStackTrace(); statusLabel.setText("Error in reading file!"); startButton.setEnabled(true); stopButton.setEnabled(false); } } //end of cluster for loop resultsTableModel.add("Summary - Equal Weightage", null, null); resultsTable.setRowSelectionInterval(numOfCluster + 1, numOfCluster + 1); computeStats(numFolds); resultsTableModel.add("Summary - Weighted Average", null, null); resultsTable.setRowSelectionInterval(numOfCluster + 2, numOfCluster + 2); computeStats(numFolds); if (clusteringClassificationThread != null) statusLabel.setText("Done!"); else statusLabel.setText("Interrupted.."); startButton.setEnabled(true); stopButton.setEnabled(false); if (classifierOne != null) { levelOneClassifierOutputScrollPane.getVerticalScrollBar() .setValue(levelOneClassifierOutputScrollPane.getVerticalScrollBar().getMaximum()); } clusteringClassificationThread = null; } }); this.clusteringClassificationThread.setPriority(Thread.MIN_PRIORITY); this.clusteringClassificationThread.start(); } else { JOptionPane.showMessageDialog(parent, "Cannot start new job as previous job still running. Click stop to terminate previous job", "ERROR", JOptionPane.ERROR_MESSAGE); } }
From source file:src.BigDataClassifier.GenerateFolds.java
public void generateFolds(Instances trainDataset) throws Exception { //randomize data Random rand = new Random(1); //set folds/*from w w w.j a v a 2s .c o m*/ int folds = 3; //create random dataset Instances randData = new Instances(trainDataset); randData.randomize(rand); Instances[] result = new Instances[folds * 2]; //cross-validate for (int n = 0; n < folds; n++) { trainDataset = randData.trainCV(folds, n); System.out.println("Train dataset size is = " + trainDataset.size()); Instances testDataset = randData.testCV(folds, n); System.out.println("Test dataset size is = " + testDataset.size()); result[n] = trainDataset; result[n + 1] = testDataset; trainDataset2 = trainDataset; testDataset2 = testDataset; } trainDatasetSize = trainDataset2.size(); testDatasetSize = testDataset2.size(); }
From source file:tubes.ml.pkg1.TubesML1.java
public void akses() throws Exception { Discretize filter;/* w w w . j a v a 2s. co m*/ int fold = 10; int fold3 = 3; int trainNum, testNum; PrintWriter file = new PrintWriter("model.txt"); /***dataset 1***/ file.println("***DATASET 1***"); fileReader tets = new fileReader("./src/data/iris.arff"); try { tets.read(); } catch (IOException ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } Instances data = tets.getData(); filter = new Discretize(); try { filter.setInputFormat(data); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } /*ID3*/ Instances discreteData; discreteData = Filter.useFilter(data, filter); trainNum = discreteData.numInstances() * 3 / 4; testNum = discreteData.numInstances() / 4; for (int i = 0; i < fold; i++) { try { Instances train = discreteData.trainCV(fold, i); Instances test = discreteData.testCV(fold, i); Id3 iTiga = new Id3(); Evaluation validation = new Evaluation(train); try { iTiga.buildClassifier(train); System.out.println(iTiga.toString()); file.println(iTiga.toString()); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } validation.evaluateModel(iTiga, test); System.out.println(validation.toSummaryString()); file.println("Validation " + (i + 1)); file.println(validation.toSummaryString()); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } } /*J48*/ trainNum = data.numInstances() * 3 / 4; testNum = data.numInstances() / 4; J48 jKT = new J48(); for (int i = 0; i < fold; i++) { Instances train = data.trainCV(fold, i); Instances test = data.testCV(fold, i); try { Evaluation validation = new Evaluation(train); try { jKT.buildClassifier(data); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } validation.evaluateModel(jKT, test); System.out.println(validation.toSummaryString()); file.println("Validation " + (i + 1)); file.println(validation.toSummaryString()); // System.out.println(jKT.toString()); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } } /*dataset 2*/ file.println("***DATASET 2***"); tets.setFilepath("./src/data/weather.arff"); try { tets.read(); } catch (IOException ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } data = new Instances(tets.getData()); /*ID3*/ discreteData = Filter.useFilter(data, filter); trainNum = discreteData.numInstances() * 3 / 4; testNum = discreteData.numInstances() / 4; for (int i = 0; i < fold3; i++) { try { Instances train = discreteData.trainCV(trainNum, i); Instances test = discreteData.testCV(testNum, i); Id3 iTiga = new Id3(); Evaluation validation = new Evaluation(train); try { iTiga.buildClassifier(train); System.out.println(iTiga.toString()); //file.println(iTiga.toString()); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } validation.evaluateModel(iTiga, test); System.out.println(validation.toSummaryString()); file.println("Validation " + (i + 1)); file.println(validation.toSummaryString()); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } } System.out.println(testNum); file.println("Test Number"); file.println(testNum); /*J48*/ trainNum = data.numInstances() * 3 / 4; testNum = data.numInstances() / 4; for (int i = 0; i < fold; i++) { Instances train = data.trainCV(fold, i); Instances test = data.testCV(fold, i); try { Evaluation validation = new Evaluation(train); try { jKT.buildClassifier(data); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } validation.evaluateModel(jKT, test); System.out.println(validation.toSummaryString()); file.println(validation.toSummaryString()); System.out.println(jKT.toString()); file.println(jKT.toString()); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } } /*dataset 3*/ file.println("***DATASET 3***"); tets.setFilepath("./src/data/weather.nominal.arff"); try { tets.read(); } catch (IOException ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } data = new Instances(tets.getData()); /*ID3*/ discreteData = Filter.useFilter(data, filter); trainNum = discreteData.numInstances() * 3 / 4; testNum = discreteData.numInstances() / 4; for (int i = 0; i < fold3; i++) { try { Instances train = discreteData.trainCV(fold, i); Instances test = discreteData.testCV(fold, i); Id3 iTiga = new Id3(); Evaluation validation = new Evaluation(train); try { iTiga.buildClassifier(train); System.out.println(iTiga.toString()); file.println(iTiga.toString()); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } validation.evaluateModel(iTiga, test); System.out.println(validation.toSummaryString()); file.println(validation.toSummaryString()); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } } System.out.println(testNum); file.println("Test Number"); file.println(testNum); /*J48*/ trainNum = data.numInstances() * 3 / 4; testNum = data.numInstances() / 4; for (int i = 0; i < fold; i++) { Instances train = data.trainCV(fold, i); Instances test = data.testCV(fold, i); try { Evaluation validation = new Evaluation(train); try { jKT.buildClassifier(data); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } validation.evaluateModel(jKT, test); System.out.println(validation.toSummaryString()); file.println(validation.toSummaryString()); System.out.println(jKT.toString()); file.println(jKT.toString()); } catch (Exception ex) { Logger.getLogger(TubesML1.class.getName()).log(Level.SEVERE, null, ex); } } /*RESULTT*/ System.out.println(jKT.toString()); file.println("RESULT"); file.println(jKT.toString()); file.close(); }