List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:meka.classifiers.multilabel.Evaluation.java
License:Open Source License
/** * EvaluateModel - Build model 'h' on 'D_train', test it on 'D_test'. * Note that raw multi-label predictions returned in Result may not have been thresholded yet. * However, data statistics, classifier info, and running times are inpregnated into the Result here. * @param h a multi-dim. classifier * @param D_train training data//from w w w . j av a2s . c om * @param D_test test data * @return raw prediction data (no evaluation yet) */ public static Result evaluateModel(MultiLabelClassifier h, Instances D_train, Instances D_test) throws Exception { long before = System.currentTimeMillis(); // Set test data as unlabelled data, if SemisupervisedClassifier if (h instanceof SemisupervisedClassifier) { ((SemisupervisedClassifier) h).introduceUnlabelledData(MLUtils.setLabelsMissing(new Instances(D_test))); } // Train h.buildClassifier(D_train); long after = System.currentTimeMillis(); //System.out.println(":- Classifier -: "+h.getClass().getName()+": "+Arrays.toString(h.getOptions())); // Test long before_test = System.currentTimeMillis(); Result result = testClassifier(h, D_test); long after_test = System.currentTimeMillis(); result.setValue("Number of training instances", D_train.numInstances()); result.setValue("Number of test instances", D_test.numInstances()); result.setValue("Label cardinality (train set)", MLUtils.labelCardinality(D_train)); result.setValue("Label cardinality (test set)", MLUtils.labelCardinality(D_test)); result.setValue("Build Time", (after - before) / 1000.0); result.setValue("Test Time", (after_test - before_test) / 1000.0); result.setValue("Total Time", (after_test - before) / 1000.0); result.setInfo("Classifier", h.getClass().getName()); result.setInfo("Options", Arrays.toString(h.getOptions())); result.setInfo("Additional Info", h.toString()); result.setInfo("Dataset", MLUtils.getDatasetName(D_train)); result.setInfo("Number of labels (L)", String.valueOf(D_train.classIndex())); //result.setInfo("Maxfreq_set",MLUtils.mostCommonCombination(D_train,result.L)); String model = h.getModel(); if (model.length() > 0) result.setModel("Model", h.getModel()); return result; }
From source file:meka.classifiers.multilabel.Evaluation.java
License:Open Source License
/** * TestClassifier - test classifier h on D_test * @param h a multi-dim. classifier, ALREADY BUILT * @param D_test test data/* ww w .ja v a 2 s . c om*/ * @return Result with raw prediction data ONLY */ public static Result testClassifier(MultiLabelClassifier h, Instances D_test) throws Exception { int L = D_test.classIndex(); Result result = new Result(D_test.numInstances(), L); if (h.getDebug()) System.out.print(":- Evaluate "); for (int i = 0, c = 0; i < D_test.numInstances(); i++) { if (h.getDebug()) { int t = i * 50 / D_test.numInstances(); if (t > c) { System.out.print("#"); c = t; } } // No cheating allowed; clear all class information AbstractInstance x = (AbstractInstance) ((AbstractInstance) D_test.instance(i)).copy(); for (int v = 0; v < D_test.classIndex(); v++) x.setValue(v, 0.0); // Get and store ranking double y[] = h.distributionForInstance(x); // Cut off any [no-longer-needed] probabalistic information from MT classifiers. if (h instanceof MultiTargetClassifier) y = Arrays.copyOf(y, L); // Store the result result.addResult(y, D_test.instance(i)); } if (h.getDebug()) System.out.println(":-"); /* if(h.getDebug()) { for(int i = 0; i < result.size(); i++) { System.out.println("\t"+Arrays.toString(result.rowTrue(i))+" vs "+Arrays.toString(result.rowRanking(i))); } } */ return result; }
From source file:meka.classifiers.multilabel.Evaluation.java
License:Open Source License
/** *Test Classifier but threaded (Multiple) * @param h a multi-dim. classifier, ALREADY BUILT (threaded, implements MultiLabelThreaded) * @param D_test test data/*from w w w . java2 s . c om*/ * @return Result with raw prediction data ONLY */ public static Result testClassifierM(MultiLabelClassifier h, Instances D_test) throws Exception { int L = D_test.classIndex(); Result result = new Result(D_test.numInstances(), L); if (h.getDebug()) System.out.print(":- Evaluate "); if (h instanceof MultiLabelClassifierThreaded) { ((MultiLabelClassifierThreaded) h).setThreaded(true); double y[][] = ((MultiLabelClassifierThreaded) h).distributionForInstanceM(D_test); for (int i = 0, c = 0; i < D_test.numInstances(); i++) { // Store the result result.addResult(y[i], D_test.instance(i)); } if (h.getDebug()) System.out.println(":-"); /* if(h.getDebug()) { for(int i = 0; i < result.size(); i++) { System.out.println("\t"+Arrays.toString(result.rowActual(i))+" vs "+Arrays.toString(result.rowRanking(i))); } } */ } return result; }
From source file:meka.classifiers.multilabel.HASEL.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { int L = D.classIndex(); int N = D.numInstances(); // Get partition from dataset hierarchy kMap = SuperLabelUtils.getPartitionFromDatasetHierarchy(D); m_M = kMap.length;/* www . ja va2 s .com*/ m_Classifiers = AbstractClassifier.makeCopies(m_Classifier, m_M); m_InstancesTemplates = new Instances[m_M]; for (int i = 0; i < m_M; i++) { if (getDebug()) System.out.println("Building model " + (i + 1) + "/" + m_M + ": " + Arrays.toString(kMap[i])); Instances D_i = SuperLabelUtils.makePartitionDataset(D, kMap[i]); m_Classifiers[i].buildClassifier(D_i); m_InstancesTemplates[i] = new Instances(D_i, 0); } }
From source file:meka.classifiers.multilabel.incremental.CCUpdateable.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);/*from w w w . j av a 2 s .c o m*/ int L = D.classIndex(); int indices[] = retrieveChain(); if (indices == null) { indices = MLUtils.gen_indices(L); MLUtils.randomize(indices, new Random(m_S)); } if (getDebug()) System.out.print(":- Chain ("); root = new ULink(indices, 0, D); if (getDebug()) System.out.println(" ) -:"); }
From source file:meka.classifiers.multilabel.incremental.IncrementalEvaluation.java
License:Open Source License
/** * EvaluateModel - Build and evaluate.//from w w w.jav a 2 s. c o m * @param h a multi-label Updateable classifier * @param options dataset options (classifier options should already be set) * @return The evaluation Result */ public static Result evaluateModel(MultiLabelClassifier h, String options[]) throws Exception { // Load Instances, ... Instances D = Evaluation.loadDataset(options); MLUtils.prepareData(D); // Set the number of windows (batches) @todo move below combining options? int nWin = OptionUtils.parse(options, 'x', 10); // Set the size of the initial triaining int nInit = OptionUtils.parse(options, "split-percentage", 10); // Partially labelled ? double rLabeled = OptionUtils.parse(options, "supervision", 1.); // Get Threshold String Top = OptionUtils.parse(options, "threshold", "0.5"); // Get Verbosity (do we want to see everything?) String Vop = OptionUtils.parse(options, "verbosity", "3"); if (h.getDebug()) System.out.println(":- Dataset -: " + MLUtils.getDatasetName(D) + "\tL=" + D.classIndex() + ""); Utils.checkForRemainingOptions(options); return evaluateModelPrequentialBasic(h, D, nWin, rLabeled, Top, Vop); }
From source file:meka.classifiers.multilabel.incremental.IncrementalEvaluation.java
License:Open Source License
/** * EvaluateModelBatchWindow - Evaluate a multi-label data-stream model over windows. * @param h Multilabel Classifier//from w ww . j a v a2s . c o m * @param D stream * @param numWindows number of windows * @param rLabeled labelled-ness (1.0 by default) * @param Top threshold option * @param Vop verbosity option * @return The Result on the final window (but it contains samples of all the other evaluated windows). * The window is sampled every N/numWindows instances, for a total of numWindows windows. */ public static Result evaluateModelBatchWindow(MultiLabelClassifier h, Instances D, int numWindows, double rLabeled, String Top, String Vop) throws Exception { if (h.getDebug()) System.out .println(":- Classifier -: " + h.getClass().getName() + ": " + Arrays.toString(h.getOptions())); int N = D.numInstances(); int L = D.classIndex(); // the Result to use Result result = null; // the samples of all windows ArrayList<HashMap<String, Object>> samples = new ArrayList<HashMap<String, Object>>(); long train_time = 0; long test_time = 0; int windowSize = (int) Math.floor(D.numInstances() / (double) numWindows); if (rLabeled * windowSize < 1.) throw new Exception("[Error] The ratio of labelled instances (" + rLabeled + ") is too small given the window size!"); double nth = 1. / rLabeled; // label every nth example Instances D_init = new Instances(D, 0, windowSize); // initial window if (h.getDebug()) { System.out.println("Training classifier on initial window ..."); } train_time = System.currentTimeMillis(); h.buildClassifier(D_init); // initial classifier train_time = System.currentTimeMillis() - train_time; if (h.getDebug()) { System.out.println("Done (in " + (train_time / 1000.0) + " s)"); } D = new Instances(D, windowSize, D.numInstances() - windowSize); // the rest (after the initial window) double t[] = new double[L]; Arrays.fill(t, 0.5); int V = MLUtils.getIntegerOption(Vop, 3); if (h.getDebug()) { System.out.println("--------------------------------------------------------------------------------"); System.out.print("#" + Utils.padLeft("w", 6) + " " + Utils.padLeft("n", 6)); for (String m : measures) { System.out.print(" "); System.out.print(Utils.padLeft(m, 12)); } System.out.println(""); System.out.println("--------------------------------------------------------------------------------"); } int i = 0; for (int w = 0; w < numWindows - 1; w++) { // For each evaluation window ... result = new Result(L); result.setInfo("Supervision", String.valueOf(rLabeled)); result.setInfo("Type", "MLi"); int n = 0; test_time = 0; train_time = 0; for (int c = 0; i < (w * windowSize) + windowSize; i++) { // For each instance in the evaluation window ... Instance x = D.instance(i); AbstractInstance x_ = (AbstractInstance) ((AbstractInstance) x).copy(); // copy // (we can't clear the class values because certain classifiers need to know how well they're doing -- just trust that there's no cheating!) //for(int j = 0; j < L; j++) // x_.setValue(j,0.0); if (rLabeled < 0.5 && (i % (int) (1 / rLabeled) == 0) || (rLabeled >= 0.5 && (i % (int) (1. / (1. - rLabeled)) != 0))) { // LABELLED - Test & record prediction long before_test = System.currentTimeMillis(); double y[] = h.distributionForInstance(x_); long after_test = System.currentTimeMillis(); test_time += (after_test - before_test); // was += result.addResult(y, x); n++; } else { // UNLABELLED x = MLUtils.setLabelsMissing(x, L); } // Update the classifier. (The classifier will have to decide if it wants to deal with unlabelled instances.) long before = System.currentTimeMillis(); ((UpdateableClassifier) h).updateClassifier(x); long after = System.currentTimeMillis(); train_time += (after - before); // was += } // calculate results result.setInfo("Threshold", Arrays.toString(t)); result.output = Result.getStats(result, Vop); result.setMeasurement("Test time", (test_time) / 1000.0); result.setMeasurement("Build time", (train_time) / 1000.0); result.setMeasurement("Total time", (test_time + train_time) / 1000.0); result.setMeasurement("Threshold", (double) t[0]); result.setMeasurement("Instances", (double) i); result.setMeasurement("Samples", (double) (samples.size() + 1)); samples.add(result.output); // Display results (to CLI) if (h.getDebug()) { System.out.print("#" + Utils.doubleToString((double) w + 1, 6, 0) + " " + Utils.doubleToString((double) n, 6, 0)); n = 0; for (String m : measures) { System.out.print(" "); System.out.print(Utils.doubleToString((Double) result.getMeasurement(m), 12, 4)); } System.out.println(""); } // Calibrate threshold for next window if (Top.equals("PCutL")) { t = ThresholdUtils.calibrateThresholds(result.predictions, MLUtils.labelCardinalities(result.actuals)); } else { Arrays.fill(t, ThresholdUtils.calibrateThreshold(result.predictions, MLUtils.labelCardinality(result.allTrueValues()))); } } if (h.getDebug()) { System.out.println("--------------------------------------------------------------------------------"); } // This is the last Result; prepare it for evaluation output. result.setInfo("Classifier", h.getClass().getName()); result.vals.put("Test time", (test_time) / 1000.0); result.vals.put("Build time", (train_time) / 1000.0); result.vals.put("Total time", (test_time + train_time) / 1000.0); result.vals.put("Total instances tested", (double) i); result.vals.put("Initial instances for training", (double) windowSize); result.setInfo("Options", Arrays.toString(h.getOptions())); result.setInfo("Additional Info", h.toString()); result.setInfo("Dataset", MLUtils.getDatasetName(D)); result.output = Result.getStats(result, Vop); result.setMeasurement("Results sampled over time", Result.getResultsAsInstances(samples)); return result; }
From source file:meka.classifiers.multilabel.incremental.IncrementalEvaluation.java
License:Open Source License
/** * Prequential Evaluation - Accuracy since the start of evaluation. * @param h Multilabel Classifier/*from w w w . ja va 2 s . c o m*/ * @param D stream * @param windowSize sampling frequency (of evaluation statistics) * @param rLabeled labelled-ness (1.0 by default) * @param Top threshold option * @param Vop verbosity option * The window is sampled every N/numWindows instances, for a total of numWindows windows. */ public static Result evaluateModelPrequentialBasic(MultiLabelClassifier h, Instances D, int windowSize, double rLabeled, String Top, String Vop) throws Exception { if (h.getDebug()) System.out .println(":- Classifier -: " + h.getClass().getName() + ": " + Arrays.toString(h.getOptions())); int L = D.classIndex(); Result result = new Result(); long train_time = 0; long test_time = 0; double nth = 1. / rLabeled; // label every nth example result.setInfo("Supervision", String.valueOf(rLabeled)); Instances D_init = new Instances(D, 0, windowSize); // initial window if (h.getDebug()) { System.out.println("Training classifier on initial window (of size " + windowSize + ") ..."); } train_time = System.currentTimeMillis(); h.buildClassifier(D_init); // initial classifir train_time = System.currentTimeMillis() - train_time; D = new Instances(D, windowSize, D.numInstances() - windowSize); // the rest (after the initial window) if (h.getDebug()) { System.out.println( "Proceeding to Test/Label/Update cycle on remaining (" + D.numInstances() + ") instances ..."); } result.setInfo("Classifier", h.getClass().getName()); result.setInfo("Options", Arrays.toString(h.getOptions())); result.setInfo("Additional Info", h.toString()); result.setInfo("Dataset", MLUtils.getDatasetName(D)); result.setInfo("Verbosity", Vop); if (h instanceof MultiTargetClassifier || Evaluation.isMT(D)) { result.setInfo("Type", "MT"); } else { result.setInfo("Type", "ML"); double t = 0.5; try { t = Double.parseDouble(Top); } catch (Exception e) { System.err.println( "[WARNING] Only a single threshold can be chosen for this kind of evaluation; Using " + t); } result.setInfo("Threshold", String.valueOf(t)); } ArrayList<HashMap<String, Object>> samples = new ArrayList<HashMap<String, Object>>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); AbstractInstance x_ = (AbstractInstance) ((AbstractInstance) x).copy(); // copy /* * TEST */ long before_test = System.currentTimeMillis(); double y[] = h.distributionForInstance(x_); long after_test = System.currentTimeMillis(); test_time += (after_test - before_test); result.addResult(y, x); /* * LABEL BECOMES AVAILABLE ? */ if (rLabeled >= 0.5) { x = MLUtils.setLabelsMissing(x, L); } /* * UPDATE * (The classifier will have to decide if it wants to deal with unlabelled instances.) */ long before = System.currentTimeMillis(); ((UpdateableClassifier) h).updateClassifier(x); long after = System.currentTimeMillis(); train_time += (after - before); /* * RECORD MEASUREMENT */ if (i % windowSize == (windowSize - 1)) { HashMap<String, Object> eval_sample = Result.getStats(result, Vop); eval_sample.put("Test time", (test_time) / 1000.0); eval_sample.put("Build time", (train_time) / 1000.0); eval_sample.put("Total time", (test_time + train_time) / 1000.0); eval_sample.put("Instances", (double) i); eval_sample.put("Samples", (double) (samples.size() + 1)); samples.add(eval_sample); System.out.println("Sample (#" + samples.size() + ") of performance at " + i + "/" + D.numInstances() + " instances."); } } result.output = Result.getStats(result, Vop); result.setMeasurement("Results sampled over time", Result.getResultsAsInstances(samples)); result.vals.put("Test time", (test_time) / 1000.0); result.vals.put("Build time", (train_time) / 1000.0); result.vals.put("Total time", (test_time + train_time) / 1000.0); return result; }
From source file:meka.classifiers.multilabel.incremental.PSUpdateable.java
License:Open Source License
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D);/* www . j av a2s .c o m*/ L = D.classIndex(); batch = new Instances(D); if (batch.numInstances() >= getLimit()) { // if we have at least the limit, build! if (getDebug()) System.out.println("Train on instances 0 ... " + batch.numInstances()); combinations = PSUtils.countCombinationsSparse(batch, L); MLUtils.pruneCountHashMap(combinations, m_P); // { NEW (we don't want more than m_Support classes!) int p = m_P; while (combinations.size() > getSupport()) { //System.out.println("double prune!"); m_P++; MLUtils.pruneCountHashMap(combinations, m_P); } super.buildClassifier(batch); m_P = p; // } NEW mlu = null; // We won't be needing the majority set classifier! } else { // otherwise we don't have enough yet, initialize the collection batch if (getDebug()) System.out.println("Continue collection batch from instance " + batch.numInstances()); // we will predict the majority labelset until we have a large enough batch mlu.buildClassifier(batch); } }
From source file:meka.classifiers.multilabel.LabelTransformationClassifier.java
License:Open Source License
/** * Returns a new set of instances either only with the labels (labels = true) or * only the features (labels = false)/*from w ww. java 2s .com*/ * * @param inst The input instances. * @param labels Return labels (true) or features (false) */ protected Instances extractPart(Instances inst, boolean labels) throws Exception { //TODO Maybe alreade exists somewhere in Meka? Remove remove = new Remove(); remove.setAttributeIndices("first-" + (inst.classIndex())); remove.setInvertSelection(labels); remove.setInputFormat(inst); return Filter.useFilter(inst, remove); }