List of usage examples for weka.classifiers.evaluation.output.prediction PlainText PlainText
PlainText
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifier(WekaClassifier wekaClassifier, Dataset trainDataset, Dataset testDataset) throws Exception { Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the train instances and get the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + trainDataset.toString() + ".arff", "-o", MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff" }); Instances train = DataSource.read(MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff"); train.setClassIndex(train.numAttributes() - 1); // Add IDs to the test instances and get the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + testDataset.toString() + ".arff", "-o", MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff" }); Instances test = DataSource.read(MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff"); test.setClassIndex(test.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data test.randomize(random);/*from w ww . j a v a 2 s . c o m*/ // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Prepare the output buffer AbstractOutput output = new PlainText(); output.setBuffer(new StringBuffer()); output.setHeader(test); output.setAttributes("first"); Evaluation eval = new Evaluation(train); eval.evaluateModel(filteredClassifier, test, output); // Convert predictions to CSV // Format: inst#, actual, predicted, error, probability, (ID) String[] scores = new String[new Double(eval.numInstances()).intValue()]; double[] probabilities = new double[new Double(eval.numInstances()).intValue()]; for (String line : output.getBuffer().toString().split("\n")) { String[] linesplit = line.split("\\s+"); // If there's been an error, the length of linesplit is 6, otherwise 5, // due to the error flag "+" int id; String expectedValue, classification; double probability; if (line.contains("+")) { id = Integer.parseInt(linesplit[6].substring(1, linesplit[6].length() - 1)); expectedValue = linesplit[2].substring(2); classification = linesplit[3].substring(2); probability = Double.parseDouble(linesplit[5]); } else { id = Integer.parseInt(linesplit[5].substring(1, linesplit[5].length() - 1)); expectedValue = linesplit[2].substring(2); classification = linesplit[3].substring(2); probability = Double.parseDouble(linesplit[4]); } scores[id - 1] = classification; probabilities[id - 1] = probability; } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".csv"), sb.toString()); // Output probabilities sb = new StringBuilder(); for (Double probability : probabilities) sb.append(probability.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".probabilities.csv"), sb.toString()); // Output predictions FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".predictions.txt"), output.getBuffer().toString()); // Output meta information sb = new StringBuilder(); sb.append(classifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".meta.txt"), sb.toString()); }
From source file:org.scify.NewSumServer.Server.MachineLearning.labelTagging.java
License:Apache License
/** * Find the recommend labels from classifier * * @return the recommend labels//w w w.j a v a 2 s .c om */ public static String recommendation(INSECTDB file, String text) { String labelList = "-none-"; //create IVector String Ivector = vector.labellingVector(text, file); // take the similarity vectors for each class graph try { Instances dataTrainSet = dataSets.trainingSet(file); //take the train dataset Instances dataLabelSet = dataSets.labelingSet(file, Ivector);//take tha labe dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataTrainSet); saver.setFile(new File("./data/dataTrainSet.arff")); saver.writeBatch(); ArffSaver saver2 = new ArffSaver(); saver2.setInstances(dataLabelSet); saver2.setFile(new File("./data/dataLabelSet.arff")); saver2.writeBatch(); File temp = File.createTempFile("exportFile", null); //TODO: creat classifier // String option = "-S 2 -K 2 -D 3 -G 0.0 -R 0.0 -N 0.5 -M 40.0 -C 1.0 -E 0.001 -P 0.1"; // classifier options // String[] options = option.split("\\s+"); if (dataTrainSet.classIndex() == -1) { dataTrainSet.setClassIndex(dataTrainSet.numAttributes() - 1); } // Create a classifier LibSVM // NaiveBayes nb = new NaiveBayes(); // RandomForest nb = new RandomForest(); J48 nb = new J48(); // nb.setOptions(options); nb.buildClassifier(dataTrainSet); // End train method if (dataLabelSet.classIndex() == -1) { dataLabelSet.setClassIndex(dataLabelSet.numAttributes() - 1); } StringBuffer writer = new StringBuffer(); PlainText output = new PlainText(); output.setBuffer(writer); output.setHeader(dataLabelSet); output.printClassifications(nb, dataLabelSet); // PrintStream ps2 = new PrintStream(classGname); // ps2.print(writer.toString()); // ps2.close(); PrintStream ps = new PrintStream(temp); //Add to temp file the results of classifying ps.print(writer.toString()); ps.close(); //TODO: export result // labelList = result(temp); //if result is true adds the current class graph name in label list labelList = result(temp) + " --------->> " + text; //if result is true adds the current class graph name in label list Utilities.appendToFile(labelList); } catch (Exception ex) { Logger.getLogger(labelTagging.class.getName()).log(Level.SEVERE, null, ex); } return labelList; }