List of usage examples for weka.core Utils forName
public static Object forName(Class<?> classType, String className, String[] options) throws Exception
From source file:ExperimentDemo.java
License:Open Source License
/** * Expects the following parameters: // w w w .j a v a 2 s . c o m * <ul> * <li>-classifier "classifier incl. parameters"</li> * <li>-exptype "classification|regression"</li> * <li>-splittype "crossvalidation|randomsplit"</li> * <li>-runs "# of runs"</li> * <li>-folds "# of cross-validation folds"</li> * <li>-percentage "percentage for randomsplit"</li> * <li>-result "arff file for storing the results"</li> * <li>-t "dataset" (can be supplied multiple times)</li> * </ul> * * @param args the commandline arguments * @throws Exception if something goes wrong */ public static void main(String[] args) throws Exception { // parameters provided? if (args.length == 0) { System.out.println("\nUsage: weka.examples.experiment.ExperimentDemo\n" + "\t -classifier <classifier incl. parameters>\n" + "\t -exptype <classification|regression>\n" + "\t -splittype <crossvalidation|randomsplit>\n" + "\t -runs <# of runs>\n" + "\t -folds <folds for CV>\n" + "\t -percentage <percentage for randomsplit>\n" + "\t -result <ARFF file for storing the results>\n" + "\t -t dataset (can be supplied multiple times)\n"); System.exit(1); } // 1. setup the experiment System.out.println("Setting up..."); Experiment exp = new Experiment(); exp.setPropertyArray(new Classifier[0]); exp.setUsePropertyIterator(true); String option; // classification or regression option = Utils.getOption("exptype", args); if (option.length() == 0) throw new IllegalArgumentException("No experiment type provided!"); SplitEvaluator se = null; /* * Interface to objects able to generate a fixed set of results for a particular split of a dataset. * The set of results should contain fields related to any settings of the SplitEvaluator (not including the dataset name. * For example, one field for the classifier used to get the results, another for the classifier options, etc). * Possible implementations of SplitEvaluator: StdClassification results, StdRegression results. */ Classifier sec = null; boolean classification = false; if (option.equals("classification")) { classification = true; se = new ClassifierSplitEvaluator(); /* * A SplitEvaluator that produces results for a classification scheme on a nominal class attribute. */ sec = ((ClassifierSplitEvaluator) se).getClassifier(); } else if (option.equals("regression")) { se = new RegressionSplitEvaluator(); sec = ((RegressionSplitEvaluator) se).getClassifier(); } else { throw new IllegalArgumentException("Unknown experiment type '" + option + "'!"); } // crossvalidation or randomsplit option = Utils.getOption("splittype", args); if (option.length() == 0) throw new IllegalArgumentException("No split type provided!"); if (option.equals("crossvalidation")) { CrossValidationResultProducer cvrp = new CrossValidationResultProducer(); /* * Generates for each run, carries out an n-fold cross-validation, using the set SplitEvaluator to generate some results. * If the class attribute is nominal, the dataset is stratified. Results for each fold are generated, so you may wish to use * this in addition with an AveragingResultProducer to obtain averages for each run. */ option = Utils.getOption("folds", args); if (option.length() == 0) throw new IllegalArgumentException("No folds provided!"); cvrp.setNumFolds(Integer.parseInt(option)); cvrp.setSplitEvaluator(se); PropertyNode[] propertyPath = new PropertyNode[2]; /* * Stores information on a property of an object: the class of the object with the property; * the property descriptor, and the current value. */ try { propertyPath[0] = new PropertyNode(se, new PropertyDescriptor("splitEvaluator", CrossValidationResultProducer.class), CrossValidationResultProducer.class); propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()), se.getClass()); } catch (IntrospectionException e) { e.printStackTrace(); } exp.setResultProducer(cvrp); exp.setPropertyPath(propertyPath); } else if (option.equals("randomsplit")) { RandomSplitResultProducer rsrp = new RandomSplitResultProducer(); rsrp.setRandomizeData(true); option = Utils.getOption("percentage", args); if (option.length() == 0) throw new IllegalArgumentException("No percentage provided!"); rsrp.setTrainPercent(Double.parseDouble(option)); rsrp.setSplitEvaluator(se); PropertyNode[] propertyPath = new PropertyNode[2]; try { propertyPath[0] = new PropertyNode(se, new PropertyDescriptor("splitEvaluator", RandomSplitResultProducer.class), RandomSplitResultProducer.class); propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()), se.getClass()); } catch (IntrospectionException e) { e.printStackTrace(); } exp.setResultProducer(rsrp); exp.setPropertyPath(propertyPath); } else { throw new IllegalArgumentException("Unknown split type '" + option + "'!"); } // runs option = Utils.getOption("runs", args); if (option.length() == 0) throw new IllegalArgumentException("No runs provided!"); exp.setRunLower(1); exp.setRunUpper(Integer.parseInt(option)); // classifier option = Utils.getOption("classifier", args); if (option.length() == 0) throw new IllegalArgumentException("No classifier provided!"); String[] options = Utils.splitOptions(option); String classname = options[0]; options[0] = ""; Classifier c = (Classifier) Utils.forName(Classifier.class, classname, options); exp.setPropertyArray(new Classifier[] { c }); // datasets boolean data = false; DefaultListModel model = new DefaultListModel(); do { option = Utils.getOption("t", args); if (option.length() > 0) { File file = new File(option); if (!file.exists()) throw new IllegalArgumentException("File '" + option + "' does not exist!"); data = true; model.addElement(file); } } while (option.length() > 0); if (!data) throw new IllegalArgumentException("No data files provided!"); exp.setDatasets(model); // result option = Utils.getOption("result", args); if (option.length() == 0) throw new IllegalArgumentException("No result file provided!"); InstancesResultListener irl = new InstancesResultListener(); irl.setOutputFile(new File(option)); exp.setResultListener(irl); // 2. run experiment System.out.println("Initializing..."); exp.initialize(); System.out.println("Running..."); exp.runExperiment(); System.out.println("Finishing..."); exp.postProcess(); // 3. calculate statistics and output them System.out.println("Evaluating..."); PairedTTester tester = new PairedCorrectedTTester(); /* * Calculates T-Test statistics on data stored in a set of instances. */ Instances result = new Instances(new BufferedReader(new FileReader(irl.getOutputFile()))); tester.setInstances(result); tester.setSortColumn(-1); tester.setRunColumn(result.attribute("Key_Run").index()); if (classification) tester.setFoldColumn(result.attribute("Key_Fold").index()); tester.setDatasetKeyColumns(new Range("" + (result.attribute("Key_Dataset").index() + 1))); tester.setResultsetKeyColumns(new Range("" + (result.attribute("Key_Scheme").index() + 1) + "," + (result.attribute("Key_Scheme_options").index() + 1) + "," + (result.attribute("Key_Scheme_version_ID").index() + 1))); tester.setResultMatrix(new ResultMatrixPlainText()); tester.setDisplayedResultsets(null); tester.setSignificanceLevel(0.05); tester.setShowStdDevs(true); // fill result matrix (but discarding the output) if (classification) tester.multiResultsetFull(0, result.attribute("Percent_correct").index()); else tester.multiResultsetFull(0, result.attribute("Correlation_coefficient").index()); // output results for reach dataset System.out.println("\nResult:"); ResultMatrix matrix = tester.getResultMatrix(); for (int i = 0; i < matrix.getColCount(); i++) { System.out.println(matrix.getColName(i)); System.out.println(" Perc. correct: " + matrix.getMean(i, 0)); System.out.println(" StdDev: " + matrix.getStdDev(i, 0)); } }
From source file:CrossValidationMultipleRuns.java
License:Open Source License
/** * Performs the cross-validation. See Javadoc of class for information * on command-line parameters./* w w w .j a v a 2s . co m*/ * * @param args the command-line parameters * @throws Exception if something goes wrong */ public static void main(String[] args) throws Exception { // loads data and set class index Instances data = DataSource.read(Utils.getOption("t", args)); String clsIndex = Utils.getOption("c", args); if (clsIndex.length() == 0) clsIndex = "last"; if (clsIndex.equals("first")) data.setClassIndex(0); else if (clsIndex.equals("last")) data.setClassIndex(data.numAttributes() - 1); else data.setClassIndex(Integer.parseInt(clsIndex) - 1); // classifier String[] tmpOptions; String classname; tmpOptions = Utils.splitOptions(Utils.getOption("W", args)); classname = tmpOptions[0]; tmpOptions[0] = ""; Classifier cls = (Classifier) Utils.forName(Classifier.class, classname, tmpOptions); // other options int runs = Integer.parseInt(Utils.getOption("r", args)); int folds = Integer.parseInt(Utils.getOption("x", args)); // perform cross-validation for (int i = 0; i < runs; i++) { // randomize data int seed = i + 1; Random rand = new Random(seed); Instances randData = new Instances(data); randData.randomize(rand); //if (randData.classAttribute().isNominal()) // randData.stratify(folds); Evaluation eval = new Evaluation(randData); StringBuilder optionsString = new StringBuilder(); for (String s : cls.getOptions()) { optionsString.append(s); optionsString.append(" "); } // output evaluation System.out.println(); System.out.println("=== Setup run " + (i + 1) + " ==="); System.out.println("Classifier: " + optionsString.toString()); System.out.println("Dataset: " + data.relationName()); System.out.println("Folds: " + folds); System.out.println("Seed: " + seed); System.out.println(); for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // build and evaluate classifier Classifier clsCopy = Classifier.makeCopy(cls); clsCopy.buildClassifier(train); eval.evaluateModel(clsCopy, test); System.out.println(eval.toClassDetailsString()); } System.out.println( eval.toSummaryString("=== " + folds + "-fold Cross-validation run " + (i + 1) + " ===", false)); } }
From source file:MPCKMeans.java
License:Open Source License
/** * Parses a given list of options./* w w w .j a va 2 s. c o m*/ * @param options the list of options as an array of strings * @exception Exception if an option is not supported * **/ public void setOptions(String[] options) throws Exception { if (Utils.getFlag('X', options)) { System.out.println("Setting seedable to: false"); setSeedable(false); } String optionString = Utils.getOption('T', options); if (optionString.length() != 0) { setTrainable(new SelectedTag(Integer.parseInt(optionString), TAGS_TRAINING)); System.out.println("Setting trainable to: " + Integer.parseInt(optionString)); } optionString = Utils.getOption('M', options); if (optionString.length() != 0) { String[] metricSpec = Utils.splitOptions(optionString); String metricName = metricSpec[0]; metricSpec[0] = ""; setMetric((LearnableMetric) Utils.forName(LearnableMetric.class, metricName, metricSpec)); System.out.println("Setting metric to: " + metricName); } optionString = Utils.getOption('L', options); if (optionString.length() != 0) { String[] learnerSpec = Utils.splitOptions(optionString); String learnerName = learnerSpec[0]; learnerSpec[0] = ""; setMetricLearner( (MPCKMeansMetricLearner) Utils.forName(MPCKMeansMetricLearner.class, learnerName, learnerSpec)); System.out.println("Setting metricLearner to: " + m_metricLearner); } optionString = Utils.getOption('G', options); if (optionString.length() != 0) { String[] regularizerSpec = Utils.splitOptions(optionString); String regularizerName = regularizerSpec[0]; regularizerSpec[0] = ""; m_metric.setRegularizer( (Regularizer) Utils.forName(Regularizer.class, regularizerName, regularizerSpec)); System.out.println("Setting regularizer to: " + regularizerName); } optionString = Utils.getOption('A', options); if (optionString.length() != 0) { String[] assignerSpec = Utils.splitOptions(optionString); String assignerName = assignerSpec[0]; assignerSpec[0] = ""; setAssigner((MPCKMeansAssigner) Utils.forName(MPCKMeansAssigner.class, assignerName, assignerSpec)); System.out.println("Setting assigner to: " + assignerName); } optionString = Utils.getOption('I', options); if (optionString.length() != 0) { String[] initializerSpec = Utils.splitOptions(optionString); String initializerName = initializerSpec[0]; initializerSpec[0] = ""; setInitializer((MPCKMeansInitializer) Utils.forName(MPCKMeansInitializer.class, initializerName, initializerSpec)); System.out.println("Setting initializer to: " + initializerName); } if (Utils.getFlag('U', options)) { setUseMultipleMetrics(true); System.out.println("Setting multiple metrics to: true"); } optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumClusters(Integer.parseInt(optionString)); System.out.println("Setting numClusters to: " + m_NumClusters); } optionString = Utils.getOption('R', options); if (optionString.length() != 0) { setRandomSeed(Integer.parseInt(optionString)); System.out.println("Setting randomSeed to: " + m_RandomSeed); } optionString = Utils.getOption('l', options); if (optionString.length() != 0) { setLogTermWeight(Double.parseDouble(optionString)); System.out.println("Setting logTermWeight to: " + m_logTermWeight); } optionString = Utils.getOption('r', options); if (optionString.length() != 0) { setRegularizerTermWeight(Double.parseDouble(optionString)); System.out.println("Setting regularizerTermWeight to: " + m_regularizerTermWeight); } optionString = Utils.getOption('m', options); if (optionString.length() != 0) { setMustLinkWeight(Double.parseDouble(optionString)); System.out.println("Setting mustLinkWeight to: " + m_MLweight); } optionString = Utils.getOption('c', options); if (optionString.length() != 0) { setCannotLinkWeight(Double.parseDouble(optionString)); System.out.println("Setting cannotLinkWeight to: " + m_CLweight); } optionString = Utils.getOption('i', options); if (optionString.length() != 0) { setMaxIterations(Integer.parseInt(optionString)); System.out.println("Setting maxIterations to: " + m_maxIterations); } optionString = Utils.getOption('B', options); if (optionString.length() != 0) { setMaxBlankIterations(Integer.parseInt(optionString)); System.out.println("Setting maxBlankIterations to: " + m_maxBlankIterations); } optionString = Utils.getOption('O', options); if (optionString.length() != 0) { setClusterAssignmentsOutputFile(optionString); System.out.println("Setting clusterAssignmentsOutputFile to: " + m_ClusterAssignmentsOutputFile); } optionString = Utils.getOption('H', options); if (optionString.length() != 0) { setConstraintIncoherenceFile(optionString); System.out.println("Setting m_ConstraintIncoherenceFile to: " + m_ConstraintIncoherenceFile); } if (Utils.getFlag('V', options)) { setUseTransitiveConstraints(false); System.out.println("Setting useTransitiveConstraints to: false"); } }
From source file:HierarchicalClusterer.java
License:Open Source License
/** * Parses a given list of options. <p/> * <!-- options-start -->/*from w w w . ja v a 2s .co m*/ * Valid options are: <p/> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { m_bPrintNewick = Utils.getFlag('P', options); String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { Integer temp = new Integer(optionString); setNumClusters(temp); } else { setNumClusters(2); } setDebug(Utils.getFlag('D', options)); setDistanceIsBranchLength(Utils.getFlag('B', options)); String sLinkType = Utils.getOption('L', options); if (sLinkType.compareTo("SINGLE") == 0) { setLinkType(new SelectedTag(SINGLE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("COMPLETE") == 0) { setLinkType(new SelectedTag(COMPLETE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("AVERAGE") == 0) { setLinkType(new SelectedTag(AVERAGE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("MEAN") == 0) { setLinkType(new SelectedTag(MEAN, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("CENTROID") == 0) { setLinkType(new SelectedTag(CENTROID, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("WARD") == 0) { setLinkType(new SelectedTag(WARD, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("ADJCOMLPETE") == 0) { setLinkType(new SelectedTag(ADJCOMLPETE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("NEIGHBOR_JOINING") == 0) { setLinkType(new SelectedTag(NEIGHBOR_JOINING, TAGS_LINK_TYPE)); } String nnSearchClass = Utils.getOption('A', options); if (nnSearchClass.length() != 0) { String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass); if (nnSearchClassSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = nnSearchClassSpec[0]; nnSearchClassSpec[0] = ""; setDistanceFunction( (DistanceFunction) Utils.forName(DistanceFunction.class, className, nnSearchClassSpec)); } else { setDistanceFunction(new EuclideanDistance()); } Utils.checkForRemainingOptions(options); }
From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java
License:Open Source License
/** * Parses a given list of options. <p/> * //from w ww .j av a 2 s . co m <!-- options-start --> * Valid options are: <p/> * * <pre> -N <num> * number of clusters. (default = 2).</pre> * * <pre> -S <num> * Random number seed. * (default 1)</pre> * * <pre> * -A <classname and options> * Distance function to be used for instance comparison * (adaptedSemanticMeasurers.ChEBIDir) * </pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumClusters(Integer.parseInt(optionString)); } String distFunctionClass = Utils.getOption('A', options); if (distFunctionClass.length() != 0) { String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass); if (distFunctionClassSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = distFunctionClassSpec[0]; distFunctionClassSpec[0] = ""; setDistanceFunction( (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec)); } else { setDistanceFunction(new ChEBIDir()); } super.setOptions(options); }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * Parses a given list of options./*from w w w .j a v a 2s. com*/ * <p/> * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -N <num> * number of clusters. * (default 2). * </pre> * * <pre> * -V * Display std. deviations for centroids. * </pre> * * <pre> * -M * Replace missing values with mean/mode. * </pre> * * <pre> * -S <num> * Random number seed. * (default 10) * </pre> * * <pre> * -A <classname and options> * Distance function to be used for instance comparison * (adaptedSemanticMeasurers.ChEBIDir) * </pre> * * <pre> * -I <num> * Maximum number of iterations. * </pre> * * <pre> * -O * Preserve order of instances. * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { m_displayStdDevs = Utils.getFlag("V", options); m_dontReplaceMissing = Utils.getFlag("M", options); String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumClusters(Integer.parseInt(optionString)); } optionString = Utils.getOption("I", options); if (optionString.length() != 0) { setMaxIterations(Integer.parseInt(optionString)); } String distFunctionClass = Utils.getOption('A', options); if (distFunctionClass.length() != 0) { String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass); if (distFunctionClassSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = distFunctionClassSpec[0]; distFunctionClassSpec[0] = ""; setDistanceFunction( (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec)); } else { setDistanceFunction(new ChEBIDir()); } m_PreserveOrder = Utils.getFlag("O", options); super.setOptions(options); }
From source file:asap.CrossValidation.java
/** * * @param dataInput/*from w ww . j a va 2 s .c o m*/ * @param classIndex * @param removeIndices * @param classifierCmd * @param seed * @param folds * @param modelOutputFile * @return * @throws Exception */ public static String performCrossValidation(String dataInput, String classIndex, String removeIndices, String[] classifierCmd, int seed, int folds, String modelOutputFile) throws Exception { // classifier String classname = classifierCmd[0]; classifierCmd[0] = ""; AbstractClassifier cls = (AbstractClassifier) Utils.forName(Classifier.class, classname, classifierCmd); return performCrossValidation(dataInput, classIndex, removeIndices, cls, seed, folds, modelOutputFile); }
From source file:asap.PostProcess.java
public void loadTrainingDataStream(PreProcessOutputStream pposTrainingData) { Instances instancesTrainingSet;//from w ww.j a v a2 s .c o m DataSource source = new DataSource(pposTrainingData); try { instancesTrainingSet = source.getDataSet(); } catch (Exception ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); return; } // setting class attribute if the data format does not provide this information if (instancesTrainingSet.classIndex() == -1) { instancesTrainingSet.setClass(instancesTrainingSet.attribute("gold_standard")); } for (String wekaModelsCmd : Config.getWekaModelsCmd()) { String[] classifierCmd; try { classifierCmd = Utils.splitOptions(wekaModelsCmd); } catch (Exception ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); continue; } String classname = classifierCmd[0]; classifierCmd[0] = ""; try { AbstractClassifier cl = (AbstractClassifier) Utils.forName(Classifier.class, classname, classifierCmd); // String modelName = String.format("%s%s%s%s.model", modelDirectory, File.separatorChar, i, classname); // System.out.println(String.format("\tBuilding model %s (%s) and doing cross-validation...", i++, modelName)); // System.out.println(CrossValidation.performCrossValidationMT(trainSet, cl, Config.getCrossValidationSeed(), Config.getCrossValidationFolds(), modelName)); systems.add(new NLPSystem(cl, instancesTrainingSet, null)); System.out.println("\tAdded system " + systems.get(systems.size() - 1).shortName()); } catch (Exception ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:br.ufrn.ia.core.clustering.SimpleKMeansIaProject.java
License:Open Source License
public void setOptions(String[] options) throws Exception { m_displayStdDevs = Utils.getFlag("V", options); m_dontReplaceMissing = Utils.getFlag("M", options); String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumClusters(Integer.parseInt(optionString)); }//from ww w .j a va2 s .c o m optionString = Utils.getOption("I", options); if (optionString.length() != 0) { setMaxIterations(Integer.parseInt(optionString)); } String distFunctionClass = Utils.getOption('A', options); if (distFunctionClass.length() != 0) { String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass); if (distFunctionClassSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = distFunctionClassSpec[0]; distFunctionClassSpec[0] = ""; setDistanceFunction( (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec)); } else { setDistanceFunction(new EuclideanDistance()); } m_PreserveOrder = Utils.getFlag("O", options); super.setOptions(options); }
From source file:clusterer.SimpleKMeansWithSilhouette.java
License:Open Source License
/** * Parses a given list of options.//from w w w. j av a2 s. co m * <p/> * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -N <num> * Number of clusters. * (default 2). * </pre> * * <pre> * -init * Initialization method to use. * 0 = random, 1 = k-means++, 2 = canopy, 3 = farthest first. * (default = 0) * </pre> * * <pre> * -C * Use canopies to reduce the number of distance calculations. * </pre> * * <pre> * -max-candidates <num> * Maximum number of candidate canopies to retain in memory * at any one time when using canopy clustering. * T2 distance plus, data characteristics, * will determine how many candidate canopies are formed before * periodic and final pruning are performed, which might result * in exceess memory consumption. This setting avoids large numbers * of candidate canopies consuming memory. (default = 100) * </pre> * * <pre> * -periodic-pruning <num> * How often to prune low density canopies when using canopy clustering. * (default = every 10,000 training instances) * </pre> * * <pre> * -min-density * Minimum canopy density, when using canopy clustering, below which * a canopy will be pruned during periodic pruning. (default = 2 instances) * </pre> * * <pre> * -t2 * The T2 distance to use when using canopy clustering. Values < 0 indicate that * a heuristic based on attribute std. deviation should be used to set this. * (default = -1.0) * </pre> * * <pre> * -t1 * The T1 distance to use when using canopy clustering. A value < 0 is taken as a * positive multiplier for T2. (default = -1.5) * </pre> * * <pre> * -V * Display std. deviations for centroids. * </pre> * * <pre> * -M * Don't replace missing values with mean/mode. * </pre> * * <pre> * -A <classname and options> * Distance function to use. * (default: weka.core.EuclideanDistance) * </pre> * * <pre> * -I <num> * Maximum number of iterations. * </pre> * * <pre> * -O * Preserve order of instances. * </pre> * * <pre> * -fast * Enables faster distance calculations, using cut-off values. * Disables the calculation/output of squared errors/distances. * </pre> * * <pre> * -num-slots <num> * Number of execution slots. * (default 1 - i.e. no parallelism) * </pre> * * <pre> * -S <num> * Random number seed. * (default 10) * </pre> * * <pre> * -output-debug-info * If set, clusterer is run in debug mode and * may output additional info to the console * </pre> * * <pre> * -do-not-check-capabilities * If set, clusterer capabilities are not checked before clusterer is built * (use with caution). * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { m_displayStdDevs = Utils.getFlag("V", options); m_dontReplaceMissing = Utils.getFlag("M", options); String initM = Utils.getOption("init", options); if (initM.length() > 0) { setInitializationMethod(new SelectedTag(Integer.parseInt(initM), TAGS_SELECTION)); } m_speedUpDistanceCompWithCanopies = Utils.getFlag('C', options); String temp = Utils.getOption("max-candidates", options); if (temp.length() > 0) { setCanopyMaxNumCanopiesToHoldInMemory(Integer.parseInt(temp)); } temp = Utils.getOption("periodic-pruning", options); if (temp.length() > 0) { setCanopyPeriodicPruningRate(Integer.parseInt(temp)); } temp = Utils.getOption("min-density", options); if (temp.length() > 0) { setCanopyMinimumCanopyDensity(Double.parseDouble(temp)); } temp = Utils.getOption("t2", options); if (temp.length() > 0) { setCanopyT2(Double.parseDouble(temp)); } temp = Utils.getOption("t1", options); if (temp.length() > 0) { setCanopyT1(Double.parseDouble(temp)); } String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumClusters(Integer.parseInt(optionString)); } optionString = Utils.getOption("I", options); if (optionString.length() != 0) { setMaxIterations(Integer.parseInt(optionString)); } String distFunctionClass = Utils.getOption('A', options); if (distFunctionClass.length() != 0) { String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass); if (distFunctionClassSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = distFunctionClassSpec[0]; distFunctionClassSpec[0] = ""; setDistanceFunction( (DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec)); } else { setDistanceFunction(new EuclideanDistance()); } m_PreserveOrder = Utils.getFlag("O", options); m_FastDistanceCalc = Utils.getFlag("fast", options); String slotsS = Utils.getOption("num-slots", options); if (slotsS.length() > 0) { setNumExecutionSlots(Integer.parseInt(slotsS)); } super.setOptions(options); Utils.checkForRemainingOptions(options); }