List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:org.scripps.branch.classifier.ManualTree.java
License:Open Source License
/** * Builds classifier.// www . j a va 2 s . c om * * @param data * the data to train with * @throws Exception * if something goes wrong or the data doesn't fit */ @Override public void buildClassifier(Instances data) throws Exception { // Make sure K value is in range if (m_KValue > data.numAttributes() - 1) m_KValue = data.numAttributes() - 1; if (m_KValue < 1) m_KValue = (int) Utils.log2(data.numAttributes()) + 1; // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } // Figure out appropriate datasets Instances train = null; Instances backfit = null; Random rand = data.getRandomNumberGenerator(m_randomSeed); if (m_NumFolds <= 0) { train = data; } else { data.randomize(rand); data.stratify(m_NumFolds); train = data.trainCV(m_NumFolds, 1, rand); backfit = data.testCV(m_NumFolds, 1); } //Set Default Instances for selection. setRequiredInst(data); // Create the attribute indices window int[] attIndicesWindow = new int[data.numAttributes() - 1]; int j = 0; for (int i = 0; i < attIndicesWindow.length; i++) { if (j == data.classIndex()) j++; // do not include the class attIndicesWindow[i] = j++; } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); classProbs[(int) inst.classValue()] += inst.weight(); } Instances requiredInstances = getRequiredInst(); // Build tree if (jsontree != null) { buildTree(train, classProbs, new Instances(data, 0), m_Debug, 0, jsontree, 0, m_distributionData, requiredInstances, listOfFc, cSetList, ccSer, d); } else { System.out.println("No json tree specified, failing to process tree"); } setRequiredInst(requiredInstances); // Backfit if required if (backfit != null) { backfitData(backfit); } }
From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.MissingValueHandler.java
License:Apache License
/** * Method to replace the identified missing values. * * @throws Exception the exception/*from w w w . j a va 2 s .c o m*/ */ public void replaceMissingValues() throws Exception { this.confirmationMessage = new ArrayList<String>(); Instances outputData; String inputFile = BASE_DIR + "OriginalDataSet.csv"; // load CSV file CSVLoader fileLoader = new CSVLoader(); fileLoader.setSource(new File(inputFile)); outputData = fileLoader.getDataSet(); int numInstances = outputData.numInstances(); int numAttributes = outputData.numAttributes(); final int NON_NUMERIC = -1; int[] m_AttributeIndices = null; Range m_Attributes = new Range("first-last"); // attributes must be numeric m_Attributes.setUpper(outputData.numAttributes() - 1); m_AttributeIndices = m_Attributes.getSelection(); for (int i = 0; i < m_AttributeIndices.length; i++) { // ignore class if (m_AttributeIndices[i] == outputData.classIndex()) { m_AttributeIndices[i] = NON_NUMERIC; continue; } // not numeric -> ignore it if (!outputData.attribute(m_AttributeIndices[i]).isNumeric()) m_AttributeIndices[i] = NON_NUMERIC; } double sum; int missingCounter; double attributeMean; // identify the missing values for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) { // non-numeric attribute? if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) { continue; } double tempArr[] = outputData.attributeToDoubleArray(attributeIndex); sum = 0; missingCounter = 0; for (int i = 0; i < tempArr.length; i++) { sum = sum + tempArr[i]; if (tempArr[i] == 0) missingCounter++; } attributeMean = sum / (numInstances - missingCounter); for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) { // replace the missing values with attribute mean values if (outputData.instance(instanceIndex).value(attributeIndex) == 0) { outputData.instance(instanceIndex).setValue(attributeIndex, attributeMean); } } } outputData.deleteAttributeAt(outputData.numAttributes() - 1); outputData.deleteAttributeAt(outputData.numAttributes() - 1); saveFilledData(inputFile, outputData); }
From source file:org.uclab.mm.kcl.ddkat.dataselector.FeatureEvaluator.java
License:Apache License
/** * Constructor to instantiate a new FeatureEvaluator object. * * @param json the data string/*w ww. j av a 2 s.c o m*/ * @param data the data set * @throws Exception the exception */ public FeatureEvaluator(String json, Instances data) throws Exception { // public FeatureEvaluator(String json, Instances data, String filePath) throws Exception { this.featureTitles = new ArrayList<String>(); this.featureScores = new ArrayList<Double>(); this.featureWeights = new ArrayList<Double>(); this.featurePriorities = new ArrayList<Double>(); OrderedJSONObject jsonObject = new OrderedJSONObject(json.toString()); JSONArray jsontokenArray = jsonObject.getJSONArray("unprocessed_data"); String csvString = ""; String str; for (int i = 0; i < jsontokenArray.length(); i++) { str = jsontokenArray.get(i).toString(); str = str.substring(1, str.length() - 1); csvString += str + "\n"; } String filePath = BASE_DIR + "FeaturesEvaluationDataSet.csv"; File file = new File(filePath); // if file does not exists, then create it if (!file.exists()) file.createNewFile(); FileUtils.writeStringToFile(file, csvString); CSVLoader loader = new CSVLoader(); loader.setSource(new File(filePath)); data = loader.getDataSet(); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); int numUnlabeledAttributes = data.numAttributes() - 1; double[] minmaxValues = new double[2]; double min, max; String[] options = new String[1]; options[0] = "-T -1.7976931348623157E308 -N -1"; // confidenceFactor = 0.25, minNumObject = 2 Ranker atrank = new Ranker(); atrank.setOptions(options); weka.attributeSelection.AttributeSelection atsel = new weka.attributeSelection.AttributeSelection(); // Information Gain Attribute Evaluator InfoGainAttributeEval infoGainAttrEval = new InfoGainAttributeEval(); atsel.setEvaluator(infoGainAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] infoGainRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { infoGainRanks[i] = Math.round(10000 * infoGainAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(infoGainRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledInfoGainRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledInfoGainRanks[i] = Math.round(10000 * ((infoGainRanks[i] - min) / (max - min))) / 10000d; } // Gain Ratio Attribute Evaluator GainRatioAttributeEval gainRatioAttrEval = new GainRatioAttributeEval(); atsel.setEvaluator(gainRatioAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] gainRatioRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { gainRatioRanks[i] = Math.round(10000 * gainRatioAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(gainRatioRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledGainRatioRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledGainRatioRanks[i] = Math.round(10000 * ((gainRatioRanks[i] - min) / (max - min))) / 10000d; } // Chi Squared Attribute Evaluator ChiSquaredAttributeEval chiSquaredAttrEval = new ChiSquaredAttributeEval(); atsel.setEvaluator(chiSquaredAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] chiSquaredRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { chiSquaredRanks[i] = Math.round(10000 * chiSquaredAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(chiSquaredRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledChiSquaredRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledChiSquaredRanks[i] = Math.round(10000 * ((chiSquaredRanks[i] - min) / (max - min))) / 10000d; } // Symmetrical Uncert Attribute Evaluator SymmetricalUncertAttributeEval symmetricalUncertAttrEval = new SymmetricalUncertAttributeEval(); atsel.setEvaluator(symmetricalUncertAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] symmetricalUncertRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { symmetricalUncertRanks[i] = Math.round(10000 * symmetricalUncertAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(symmetricalUncertRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledSymmetricalUncertRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledSymmetricalUncertRanks[i] = Math.round(10000 * ((symmetricalUncertRanks[i] - min) / (max - min))) / 10000d; } // Significance Attribute Evaluator SignificanceAttributeEval significanceAttrEval = new SignificanceAttributeEval(); atsel.setEvaluator(significanceAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] significanceRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { significanceRanks[i] = Math.round(10000 * significanceAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(significanceRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledSignificanceRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledSignificanceRanks[i] = Math.round(10000 * ((significanceRanks[i] - min) / (max - min))) / 10000d; } double attributeSum; double[] combinedRanks = new double[numUnlabeledAttributes]; double combinedranksSum = 0; for (int i = 0; i < numUnlabeledAttributes; i++) { attributeSum = scaledInfoGainRanks[i] + scaledGainRatioRanks[i] + scaledChiSquaredRanks[i] + scaledSymmetricalUncertRanks[i] + scaledSignificanceRanks[i]; combinedRanks[i] = Math.round(10000 * attributeSum) / 10000d; combinedranksSum = combinedranksSum + combinedRanks[i]; } double[][] tempArray = new double[numUnlabeledAttributes][2]; String[] attributesTitles = new String[numUnlabeledAttributes]; double[] attributesScores = new double[numUnlabeledAttributes]; double[] attributesWeights = new double[numUnlabeledAttributes]; double[] attributesPriorities = new double[numUnlabeledAttributes]; for (int j = 0; j < numUnlabeledAttributes; j++) { tempArray[j][0] = j; tempArray[j][1] = combinedRanks[j]; } double temp; for (int i = 0; i < numUnlabeledAttributes; i++) { for (int j = 1; j < (numUnlabeledAttributes - i); j++) { if (combinedRanks[j - 1] < combinedRanks[j]) { //swap the elements! temp = combinedRanks[j - 1]; combinedRanks[j - 1] = combinedRanks[j]; combinedRanks[j] = temp; } } } for (int j = 0; j < numUnlabeledAttributes; j++) { for (int k = 0; k < numUnlabeledAttributes; k++) { if (combinedRanks[j] == tempArray[k][1]) { attributesTitles[j] = data.attribute((int) tempArray[k][0]).toString(); String res[] = attributesTitles[j].split("\\s+"); attributesTitles[j] = res[1]; this.featureTitles.add(attributesTitles[j]); break; } } attributesScores[j] = Math.round(10000 * (combinedRanks[j] / 9)) / 100d; attributesWeights[j] = Math.round(10000 * (combinedRanks[j] / combinedranksSum)) / 100d; attributesPriorities[j] = Math.round(attributesScores[j] * attributesWeights[j]) / 100d; this.featureScores.add(attributesScores[j]); this.featureWeights.add(attributesWeights[j]); this.featurePriorities.add(attributesPriorities[j]); System.out.println(attributesTitles[j] + " is " + attributesScores[j] + " % Important"); } }
From source file:org.uclab.mm.kcl.ddkat.modellearner.ModelLearner.java
License:Apache License
/** * Method to compute the classification accuracy. * * @param algo the algorithm name/*from ww w . j a v a 2 s . c o m*/ * @param data the data instances * @param datanature the dataset nature (i.e. original or processed data) * @throws Exception the exception */ protected String[] modelAccuracy(String algo, Instances data, String datanature) throws Exception { String modelResultSet[] = new String[4]; String modelStr = ""; Classifier classifier = null; // setting class attribute if the data format does not provide this information if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); String decisionAttribute = data.attribute(data.numAttributes() - 1).toString(); String res[] = decisionAttribute.split("\\s+"); decisionAttribute = res[1]; if (algo.equals("BFTree")) { // Use BFTree classifiers BFTree BFTreeclassifier = new BFTree(); BFTreeclassifier.buildClassifier(data); modelStr = BFTreeclassifier.toString(); classifier = BFTreeclassifier; } else if (algo.equals("FT")) { // Use FT classifiers FT FTclassifier = new FT(); FTclassifier.buildClassifier(data); modelStr = FTclassifier.toString(); classifier = FTclassifier; } else if (algo.equals("J48")) { // Use J48 classifiers J48 J48classifier = new J48(); J48classifier.buildClassifier(data); modelStr = J48classifier.toString(); classifier = J48classifier; System.out.println("Model String: " + modelStr); } else if (algo.equals("J48graft")) { // Use J48graft classifiers J48graft J48graftclassifier = new J48graft(); J48graftclassifier.buildClassifier(data); modelStr = J48graftclassifier.toString(); classifier = J48graftclassifier; } else if (algo.equals("RandomTree")) { // Use RandomTree classifiers RandomTree RandomTreeclassifier = new RandomTree(); RandomTreeclassifier.buildClassifier(data); modelStr = RandomTreeclassifier.toString(); classifier = RandomTreeclassifier; } else if (algo.equals("REPTree")) { // Use REPTree classifiers REPTree REPTreeclassifier = new REPTree(); REPTreeclassifier.buildClassifier(data); modelStr = REPTreeclassifier.toString(); classifier = REPTreeclassifier; } else if (algo.equals("SimpleCart")) { // Use SimpleCart classifiers SimpleCart SimpleCartclassifier = new SimpleCart(); SimpleCartclassifier.buildClassifier(data); modelStr = SimpleCartclassifier.toString(); classifier = SimpleCartclassifier; } modelResultSet[0] = algo; modelResultSet[1] = decisionAttribute; modelResultSet[2] = modelStr; // Collect every group of predictions for J48 model in a FastVector FastVector predictions = new FastVector(); Evaluation evaluation = new Evaluation(data); int folds = 10; // cross fold validation = 10 evaluation.crossValidateModel(classifier, data, folds, new Random(1)); // System.out.println("Evaluatuion"+evaluation.toSummaryString()); System.out.println("\n\n" + datanature + " Evaluatuion " + evaluation.toMatrixString()); // ArrayList<Prediction> predictions = evaluation.predictions(); predictions.appendElements(evaluation.predictions()); System.out.println("\n\n 11111"); // Calculate overall accuracy of current classifier on all splits double correct = 0; for (int i = 0; i < predictions.size(); i++) { NominalPrediction np = (NominalPrediction) predictions.elementAt(i); if (np.predicted() == np.actual()) { correct++; } } System.out.println("\n\n 22222"); double accuracy = 100 * correct / predictions.size(); String accString = String.format("%.2f%%", accuracy); modelResultSet[3] = accString; System.out.println(datanature + " Accuracy " + accString); String modelFileName = algo + "-DDKA.model"; System.out.println("\n\n 33333"); ObjectOutputStream oos = new ObjectOutputStream( new FileOutputStream("D:\\DDKAResources\\" + modelFileName)); oos.writeObject(classifier); oos.flush(); oos.close(); return modelResultSet; }
From source file:org.wkwk.classifier.Access.java
/** * @param args the command line arguments * args[0] = filename train set// w w w . ja va2 s . c o m * args[1] = filename test set * args[2] = remove attribute * args[3] = bias resample * @throws java.lang.Exception */ public static void main(String[] args) throws Exception { // Read Dataset (arff, csv) DataSource source = new DataSource("../data/weather.nominal.arff"); //DataSource testSource = new DataSource(args[1]); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } // Remove attr // String[] rangeOps = new String[2]; // rangeOps[0] = "-R"; // "range" // rangeOps[1] = args[2]; // first attribute // Remove remove = new Remove(); // new instance of filter // remove.setOptions(rangeOps); // set options // remove.setInputFormat(data); // inform filter about dataset **AFTER** setting options // Instances newData = Filter.useFilter(data, remove); // apply filter // // // Filter Resample // String[] biasOps = new String[2]; // biasOps[0] = "-B"; // "range" // biasOps[1] = args[3]; // first attribute // Resample resample = new Resample(); // resample.setOptions(biasOps); // resample.setInputFormat(data); // newData = Filter.useFilter(data, resample); // // Build Classifier MyC45 tree = new MyC45(); // new instance of tree tree.buildClassifier(data); // build classifier // Evaluation with test set //Instances testSet = testSource.getDataSet(); // train classifier //Classifier cls = new MyId3(); //cls.buildClassifier(data); // evaluate classifier and print some statistics //Evaluation eval = new Evaluation(data); //eval.evaluateModel(cls, testSet); //System.out.println(eval.toSummaryString("\nResults\n======\n", false)); // Evaluation with 10 Fold-CV Evaluation evalCV = new Evaluation(data); evalCV.crossValidateModel(tree, data, 10, new Random(1)); System.out.println(evalCV.toSummaryString("\nResults\n======\n", false)); }
From source file:PEBL.TwoStep.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource( "Z:\\\\shared from vm\\\\fourthset\\\\mixed.csv"); Instances data = source.getDataSet(); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); }//from w ww. jav a2s . c o m NumericToNominal nmf = new NumericToNominal(); nmf.setInputFormat(data); data = Filter.useFilter(data, nmf); // build a c4.5 classifier String[] options = new String[1]; // options[0] = "-C 0.25 -M 2"; // unpruned tree options[0] = "-K"; NaiveBayes c = new NaiveBayes(); // new instance of tree c.setOptions(options); // set the options c.buildClassifier(data); // build classifier // eval Evaluation eval = new Evaluation(data); eval.crossValidateModel(c, data, 10, new Random(1)); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); System.out.println(eval.toClassDetailsString()); System.out.println("--- model learned on mixed set ---"); // load unlabeled data ConverterUtils.DataSource s = new ConverterUtils.DataSource( "Z:\\\\shared from vm\\\\fourthset\\\\unlabelled.csv"); Instances unlabeled = s.getDataSet(); // set class attribute unlabeled.setClassIndex(unlabeled.numAttributes() - 1); nmf = new NumericToNominal(); nmf.setInputFormat(unlabeled); unlabeled = Filter.useFilter(unlabeled, nmf); // label instances for (int i = 0; i < unlabeled.numInstances(); i++) { double classZero = c.distributionForInstance(unlabeled.instance(i))[0]; double classOne = c.distributionForInstance(unlabeled.instance(i))[1]; System.out.print( "classifying: " + unlabeled.instance(i) + " : " + classZero + " - " + classOne + " == class: "); if (classZero > classOne) { System.out.print("0"); unlabeled.instance(i).setClassValue("0"); } else { System.out.print("1"); unlabeled.instance(i).setClassValue("1"); } System.out.println(""); } // save labeled data // BufferedWriter writer = new BufferedWriter( // new FileWriter("Z:\\\\shared from vm\\\\thirdset\\\\relabelled.arff")); // writer.write(labeled.toString()); // writer.newLine(); // writer.flush(); // writer.close(); ArffSaver saver = new ArffSaver(); saver.setInstances(unlabeled); saver.setFile(new File("Z:\\shared from vm\\thirdset\\relabelled.arff")); // saver.setDestination(new File("Z:\\shared from vm\\thirdset\\relabelled.arff")); // **not** necessary in 3.5.4 and later saver.writeBatch(); }
From source file:pl.nask.hsn2.service.analysis.JSWekaAnalyzer.java
License:Open Source License
private void createTrainingSet(String arffFilePath) { try {/*w w w . j av a 2 s.c om*/ ConverterUtils.DataSource source = new ConverterUtils.DataSource(arffFilePath); Instances trainingSetTemp = source.getDataSet(); if (trainingSetTemp.classIndex() == -1) { trainingSetTemp.setClassIndex(trainingSetTemp.numAttributes() - 1); } JSWekaAnalyzer.trainingSet = trainingSetTemp; } catch (Exception e) { LOGGER.error(e.getMessage(), e); } }
From source file:predictors.HelixIndexer.java
License:Open Source License
/** * Trains the Weka Classifer.//from www. j a va 2s . c om */ public void trainClassifier() { try { RandomForest classifier = new weka.classifiers.trees.RandomForest(); Instances data = this.dataset; if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } data.randomize(new Random(data.size())); String[] optClassifier = weka.core.Utils.splitOptions("-I 100 -K 9 -S 1 -num-slots 3"); classifier.setOptions(optClassifier); classifier.setSeed(data.size()); classifier.buildClassifier(data); this.classifier = classifier; this.isTrained = true; } catch (Exception e) { ErrorUtils.printError(HelixIndexer.class, "Training failed", e); } }
From source file:predictors.HelixPredictor.java
License:Open Source License
/** * Trains the Weka Classifer./* w w w. j a v a2s . c o m*/ */ public void trainClassifier() { try { MultilayerPerceptron classifier = new weka.classifiers.functions.MultilayerPerceptron(); Instances data = this.dataset; if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } data.randomize(new Random(data.size())); String[] optClassifier = weka.core.Utils .splitOptions("-L 0.01 -M 0.8 -N 256 -V 20 -S 0 -E 5 -H 25 -B -I -D -C"); classifier.setOptions(optClassifier); classifier.setSeed(data.size()); classifier.buildClassifier(data); this.classifier = classifier; this.isTrained = true; } catch (Exception e) { ErrorUtils.printError(HelixPredictor.class, "Training failed", e); } }
From source file:predictors.TopologyPredictor.java
License:Open Source License
/** * Trains the Weka Classifer.//from w ww. j av a 2s .com */ public void trainClassifier() { try { RandomForest classifier = new weka.classifiers.trees.RandomForest(); Instances data = this.dataset; if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } data.randomize(new Random(data.size())); String[] optClassifier = weka.core.Utils.splitOptions("-I 100 -K 7 -S 1 -num-slots 1"); classifier.setOptions(optClassifier); classifier.setSeed(data.size()); classifier.buildClassifier(data); this.classifier = classifier; this.isTrained = true; } catch (Exception e) { ErrorUtils.printError(TopologyPredictor.class, "Training failed", e); } }