List of usage examples for weka.filters.unsupervised.instance RemovePercentage RemovePercentage
RemovePercentage
From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.TwitterClassifer.java
public void trainModel() { Instances trainingData = loadTrainingData(); System.out.println("Class attribute: " + trainingData.classAttribute().toString()); // Partition dataset into training and test sets RemovePercentage filter = new RemovePercentage(); filter.setPercentage(10);/* w ww . j a v a 2s. c o m*/ Instances testData = null; // Split in training and testdata try { filter.setInputFormat(trainingData); testData = Filter.useFilter(trainingData, filter); } catch (Exception ex) { //Logger.getLogger(Trainer.class.getName()).log(Level.SEVERE, null, ex); System.out.println("Error getting testData: " + ex.toString()); } // Train the classifier Classifier model = (Classifier) new NaiveBayes(); try { // Save the model to fil // serialize model weka.core.SerializationHelper.write(modelDir + algorithm + ".model", model); } catch (Exception ex) { Logger.getLogger(TwitterClassifer.class.getName()).log(Level.SEVERE, null, ex); } // Set the local model this.trainedModel = model; try { model.buildClassifier(trainingData); } catch (Exception ex) { //Logger.getLogger(Trainer.class.getName()).log(Level.SEVERE, null, ex); System.out.println("Error training model: " + ex.toString()); } try { // Evaluate model Evaluation test = new Evaluation(trainingData); test.evaluateModel(model, testData); System.out.println(test.toSummaryString()); } catch (Exception ex) { //Logger.getLogger(Trainer.class.getName()).log(Level.SEVERE, null, ex); System.out.println("Error evaluating model: " + ex.toString()); } }
From source file:experimentalclassifier.ExperimentalClassifier.java
/** * @param args the command line arguments *//* w w w .j a va 2 s . c o m*/ public static void main(String[] args) throws Exception { DataSource source = new DataSource("data/iris.csv"); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } data.randomize(new Random()); String[] options = weka.core.Utils.splitOptions("-P 30"); RemovePercentage remove = new RemovePercentage(); remove.setOptions(options); remove.setInputFormat(data); Instances train = Filter.useFilter(data, remove); remove.setInvertSelection(true); remove.setInputFormat(data); Instances test = Filter.useFilter(data, remove); Classifier classifier = new HardCodedClassifier(); classifier.buildClassifier(train);//Currently, this does nothing Evaluation eval = new Evaluation(train); eval.evaluateModel(classifier, test); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); }
From source file:expshell.ExpShell.java
/** * @param args the command line arguments * @throws java.lang.Exception//from www . ja v a 2 s. co m */ public static void main(String[] args) throws Exception { String file = "C:\\Users\\YH Jonathan Kwok\\Documents\\NetBeansProjects\\ExpShell\\src\\expshell\\iris.csv"; DataSource source = new DataSource(file); Instances data = source.getDataSet(); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); //Randomize it data.randomize(new Random(1)); RemovePercentage rp = new RemovePercentage(); rp.setPercentage(70); rp.setInputFormat(data); Instances training = Filter.useFilter(data, rp); rp.setInvertSelection(true); rp.setInputFormat(data); Instances test = Filter.useFilter(data, rp); //standardize the data Standardize filter = new Standardize(); filter.setInputFormat(training); Instances newTest = Filter.useFilter(test, filter); Instances newTraining = Filter.useFilter(training, filter); //Part 5 - Now it's a knn Classifier knn = new NeuralClassifier(); knn.buildClassifier(newTraining); Evaluation eval = new Evaluation(newTraining); eval.evaluateModel(knn, newTest); System.out.println(eval.toSummaryString("***** Overall results: *****", false)); }
From source file:irisdata.IrisData.java
/** * @param args the command line arguments * @throws java.lang.Exception //from w w w . j a va2 s.c om */ public static void main(String[] args) throws Exception { String file = "/Users/paul/Desktop/BYU-Idaho/Spring2015/CS450/iris.csv"; DataSource source = new DataSource(file); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } data.randomize(new Random(1)); // set training set to 70% RemovePercentage remove = new RemovePercentage(); remove.setPercentage(30); remove.setInputFormat(data); Instances trainingSet = Filter.useFilter(data, remove); // set the rest for the testing set remove.setInvertSelection(true); Instances testSet = Filter.useFilter(data, remove); // train classifier - kind of HardCodedClassifier classifier = new HardCodedClassifier(); classifier.buildClassifier(trainingSet); // this does nothing right now // Evaluate classifier Evaluation eval = new Evaluation(trainingSet); eval.evaluateModel(classifier, testSet); //eval.crossValidateModel(classifier, data, 10, new Random(1)); // Print some statistics System.out.println("Results: " + eval.toSummaryString()); }
From source file:mulan.classifier.transformation.EnsembleOfClassifierChains.java
License:Open Source License
@Override protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { Instances dataSet = new Instances(trainingSet.getDataSet()); for (int i = 0; i < numOfModels; i++) { debug("ECC Building Model:" + (i + 1) + "/" + numOfModels); // 2013.12.13 System.out.println("ECC Building Model:" + (i + 1) + "/" + numOfModels); Instances sampledDataSet = null; dataSet.randomize(rand);//from w w w .j ava 2 s .c om if (useSamplingWithReplacement) { int bagSize = dataSet.numInstances() * BagSizePercent / 100; // create the in-bag dataset sampledDataSet = dataSet.resampleWithWeights(new Random(1)); if (bagSize < dataSet.numInstances()) { sampledDataSet = new Instances(sampledDataSet, 0, bagSize); } } else { RemovePercentage rmvp = new RemovePercentage(); rmvp.setInvertSelection(true); rmvp.setPercentage(samplingPercentage); rmvp.setInputFormat(dataSet); sampledDataSet = Filter.useFilter(dataSet, rmvp); } MultiLabelInstances train = new MultiLabelInstances(sampledDataSet, trainingSet.getLabelsMetaData()); int[] chain = new int[numLabels]; for (int j = 0; j < numLabels; j++) chain[j] = j; for (int j = 0; j < chain.length; j++) { int randomPosition = rand.nextInt(chain.length); int temp = chain[j]; chain[j] = chain[randomPosition]; chain[randomPosition] = temp; } debug(Arrays.toString(chain)); //======================================== System.out.println(Arrays.toString(chain)); //======================================== // MAYBE WE SHOULD CHECK NOT TO PRODUCE THE SAME VECTOR FOR THE // INDICES // BUT IN THE PAPER IT DID NOT MENTION SOMETHING LIKE THAT // IT JUST SIMPLY SAY A RANDOM CHAIN ORDERING OF L ensemble[i] = new ClassifierChain(baseClassifier, chain); ensemble[i].build(train); } }
From source file:mulan.examples.TrainTestExperiment.java
License:Open Source License
public static void main(String[] args) { String[] methodsToCompare = { "HOMER", "BR", "CLR", "MLkNN", "MC-Copy", "IncludeLabels", "MC-Ignore", "RAkEL", "LP", "MLStacking" }; try {//from w ww. j a v a2s . c o m String path = Utils.getOption("path", args); // e.g. -path dataset/ String filestem = Utils.getOption("filestem", args); // e.g. -filestem emotions String percentage = Utils.getOption("percentage", args); // e.g. -percentage 50 (for 50%) System.out.println("Loading the dataset"); MultiLabelInstances mlDataSet = new MultiLabelInstances(path + filestem + ".arff", path + filestem + ".xml"); //split the data set into train and test Instances dataSet = mlDataSet.getDataSet(); //dataSet.randomize(new Random(1)); RemovePercentage rmvp = new RemovePercentage(); rmvp.setInvertSelection(true); rmvp.setPercentage(Double.parseDouble(percentage)); rmvp.setInputFormat(dataSet); Instances trainDataSet = Filter.useFilter(dataSet, rmvp); rmvp = new RemovePercentage(); rmvp.setPercentage(Double.parseDouble(percentage)); rmvp.setInputFormat(dataSet); Instances testDataSet = Filter.useFilter(dataSet, rmvp); MultiLabelInstances train = new MultiLabelInstances(trainDataSet, path + filestem + ".xml"); MultiLabelInstances test = new MultiLabelInstances(testDataSet, path + filestem + ".xml"); Evaluator eval = new Evaluator(); Evaluation results; for (int i = 0; i < methodsToCompare.length; i++) { if (methodsToCompare[i].equals("BR")) { System.out.println(methodsToCompare[i]); Classifier brClassifier = new NaiveBayes(); BinaryRelevance br = new BinaryRelevance(brClassifier); br.setDebug(true); br.build(train); results = eval.evaluate(br, test); System.out.println(results); } if (methodsToCompare[i].equals("LP")) { System.out.println(methodsToCompare[i]); Classifier lpBaseClassifier = new J48(); LabelPowerset lp = new LabelPowerset(lpBaseClassifier); lp.setDebug(true); lp.build(train); results = eval.evaluate(lp, test); System.out.println(results); } if (methodsToCompare[i].equals("CLR")) { System.out.println(methodsToCompare[i]); Classifier clrClassifier = new J48(); CalibratedLabelRanking clr = new CalibratedLabelRanking(clrClassifier); clr.setDebug(true); clr.build(train); results = eval.evaluate(clr, test); System.out.println(results); } if (methodsToCompare[i].equals("RAkEL")) { System.out.println(methodsToCompare[i]); MultiLabelLearner lp = new LabelPowerset(new J48()); RAkEL rakel = new RAkEL(lp); rakel.setDebug(true); rakel.build(train); results = eval.evaluate(rakel, test); System.out.println(results); } if (methodsToCompare[i].equals("MC-Copy")) { System.out.println(methodsToCompare[i]); Classifier mclClassifier = new J48(); MultiClassTransformation mcTrans = new Copy(); MultiClassLearner mcl = new MultiClassLearner(mclClassifier, mcTrans); mcl.setDebug(true); mcl.build(train); results = eval.evaluate(mcl, test); System.out.println(results); } if (methodsToCompare[i].equals("MC-Ignore")) { System.out.println(methodsToCompare[i]); Classifier mclClassifier = new J48(); MultiClassTransformation mcTrans = new Ignore(); MultiClassLearner mcl = new MultiClassLearner(mclClassifier, mcTrans); mcl.build(train); results = eval.evaluate(mcl, test); System.out.println(results); } if (methodsToCompare[i].equals("IncludeLabels")) { System.out.println(methodsToCompare[i]); Classifier ilClassifier = new J48(); IncludeLabelsClassifier il = new IncludeLabelsClassifier(ilClassifier); il.setDebug(true); il.build(train); results = eval.evaluate(il, test); System.out.println(results); } if (methodsToCompare[i].equals("MLkNN")) { System.out.println(methodsToCompare[i]); int numOfNeighbors = 10; double smooth = 1.0; MLkNN mlknn = new MLkNN(numOfNeighbors, smooth); mlknn.setDebug(true); mlknn.build(train); results = eval.evaluate(mlknn, test); System.out.println(results); } if (methodsToCompare[i].equals("HMC")) { System.out.println(methodsToCompare[i]); Classifier baseClassifier = new J48(); LabelPowerset lp = new LabelPowerset(baseClassifier); RAkEL rakel = new RAkEL(lp); HMC hmc = new HMC(rakel); hmc.build(train); results = eval.evaluate(hmc, test); System.out.println(results); } if (methodsToCompare[i].equals("HOMER")) { System.out.println(methodsToCompare[i]); Classifier baseClassifier = new SMO(); CalibratedLabelRanking learner = new CalibratedLabelRanking(baseClassifier); learner.setDebug(true); HOMER homer = new HOMER(learner, 3, HierarchyBuilder.Method.Random); homer.setDebug(true); homer.build(train); results = eval.evaluate(homer, test); System.out.println(results); } if (methodsToCompare[i].equals("MLStacking")) { System.out.println(methodsToCompare[i]); int numOfNeighbors = 10; Classifier baseClassifier = new IBk(numOfNeighbors); Classifier metaClassifier = new Logistic(); MultiLabelStacking mls = new MultiLabelStacking(baseClassifier, metaClassifier); mls.setMetaPercentage(1.0); mls.setDebug(true); mls.build(train); results = eval.evaluate(mls, test); System.out.println(results); } } } catch (Exception e) { e.printStackTrace(); } }
From source file:neuralnetwork.NeuralNetwork.java
/** * @param args the command line arguments * @throws java.lang.Exception/*from w ww .ja v a 2s . c o m*/ */ public static void main(String[] args) throws Exception { ConverterUtils.DataSource source; source = new ConverterUtils.DataSource("C:\\Users\\Harvey\\Documents\\iris.csv"); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } data.randomize(new Debug.Random(1)); RemovePercentage trainFilter = new RemovePercentage(); trainFilter.setPercentage(70); trainFilter.setInputFormat(data); Instances train = Filter.useFilter(data, trainFilter); trainFilter.setInvertSelection(true); trainFilter.setInputFormat(data); Instances test = Filter.useFilter(data, trainFilter); Standardize filter = new Standardize(); filter.setInputFormat(train); Instances newTrain = Filter.useFilter(test, filter); Instances newTest = Filter.useFilter(train, filter); Classifier nNet = new NeuralNet(); nNet.buildClassifier(newTrain); Evaluation eval = new Evaluation(newTest); eval.evaluateModel(nNet, newTest); System.out.println(eval.toSummaryString("\nResults\n-------------\n", false)); }