List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:app.RunApp.java
License:Open Source License
/** * Transform multi-label dataset into one or more multi-class or binary datasets * //w ww. j av a 2 s . c o m * @return Positive number if successfull and negative otherwise */ private int transform() { if (dataset == null) { JOptionPane.showMessageDialog(null, "You must load a dataset.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } transformedDatasets.clear(); if (radioBRTrans.isSelected()) { BinaryRelevanceTransformation BRTrans = new BinaryRelevanceTransformation(dataset); for (int i = 0; i < dataset.getNumLabels(); i++) { try { LabelsMetaDataImpl newLMD = (LabelsMetaDataImpl) dataset.getLabelsMetaData().clone(); for (int j = 0; j < dataset.getNumLabels(); j++) { if (i != j) { newLMD.removeLabelNode(dataset.getLabelNames()[j]); } } Instances inst = BRTrans.transformInstances(i); inst.renameAttribute(inst.classIndex(), dataset.getLabelNames()[i]); transformedDatasets.add(inst); } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); return -1; } } } else if (radioLPTrans.isSelected()) { try { LabelPowersetTransformation LPTrans = new LabelPowersetTransformation(); Instances inst = LPTrans.transformInstances(dataset); transformedDatasets.add(inst); } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); return -1; } } else if (radioRemoveLabelsTrans.isSelected()) { try { Instances inst = RemoveAllLabels.transformInstances(dataset); transformedDatasets.add(inst); } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); return -1; } } else if (radioIncludeLabelsTrans.isSelected()) { try { IncludeLabelsTransformation includeTrans = new IncludeLabelsTransformation(); Instances inst = includeTrans.transformInstances(dataset); transformedDatasets.add(inst); } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); return -1; } } jButtonSaveDatasetsTrans.setEnabled(true); return 1; }
From source file:asap.PostProcess.java
public void loadTrainingDataStream(PreProcessOutputStream pposTrainingData) { Instances instancesTrainingSet; DataSource source = new DataSource(pposTrainingData); try {/*from w ww . j av a 2s.co m*/ instancesTrainingSet = source.getDataSet(); } catch (Exception ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); return; } // setting class attribute if the data format does not provide this information if (instancesTrainingSet.classIndex() == -1) { instancesTrainingSet.setClass(instancesTrainingSet.attribute("gold_standard")); } for (String wekaModelsCmd : Config.getWekaModelsCmd()) { String[] classifierCmd; try { classifierCmd = Utils.splitOptions(wekaModelsCmd); } catch (Exception ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); continue; } String classname = classifierCmd[0]; classifierCmd[0] = ""; try { AbstractClassifier cl = (AbstractClassifier) Utils.forName(Classifier.class, classname, classifierCmd); // String modelName = String.format("%s%s%s%s.model", modelDirectory, File.separatorChar, i, classname); // System.out.println(String.format("\tBuilding model %s (%s) and doing cross-validation...", i++, modelName)); // System.out.println(CrossValidation.performCrossValidationMT(trainSet, cl, Config.getCrossValidationSeed(), Config.getCrossValidationFolds(), modelName)); systems.add(new NLPSystem(cl, instancesTrainingSet, null)); System.out.println("\tAdded system " + systems.get(systems.size() - 1).shortName()); } catch (Exception ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:asap.PostProcess.java
public void loadEvaluationDataStream(PreProcessOutputStream pposEvaluationData) { Instances instancesEvaluationSet; DataSource source = new DataSource(pposEvaluationData); try {//from ww w .ja v a 2s .c o m instancesEvaluationSet = source.getDataSet(); } catch (Exception ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); return; } // setting class attribute if the data format does not provide this information if (instancesEvaluationSet.classIndex() == -1) { instancesEvaluationSet.setClass(instancesEvaluationSet.attribute("gold_standard")); } for (NLPSystem system : systems) { system.setEvaluationSet(instancesEvaluationSet); } }
From source file:binarizer.LayoutAnalysis.java
public double crossValidation(String arffFile) throws Exception { DataSource source = new DataSource(arffFile); Instances trainingData = source.getDataSet(); if (trainingData.classIndex() == -1) trainingData.setClassIndex(trainingData.numAttributes() - 1); NaiveBayes nb = new NaiveBayes(); nb.setUseSupervisedDiscretization(true); Evaluation evaluation = new Evaluation(trainingData); evaluation.crossValidateModel(nb, trainingData, 10, new Random(1)); System.out.println(evaluation.toSummaryString()); return evaluation.errorRate(); }
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Prints the header for the predictions output into a supplied StringBuffer * /*from w w w . ja v a 2 s. c om*/ * @param test structure of the test set to print predictions for * @param attributesToOutput indices of the attributes to output * @param printDistribution prints the complete distribution for nominal * attributes, not just the predicted value * @param text the StringBuffer to print to */ protected static void printClassificationsHeader(Instances test, Range attributesToOutput, boolean printDistribution, StringBuffer text) { // print header if (test.classAttribute().isNominal()) { if (printDistribution) { text.append(" inst# actual predicted error distribution"); } else { text.append(" inst# actual predicted error prediction"); } } else { text.append(" inst# actual predicted error"); } if (attributesToOutput != null) { attributesToOutput.setUpper(test.numAttributes() - 1); text.append(" ("); boolean first = true; for (int i = 0; i < test.numAttributes(); i++) { if (i == test.classIndex()) { continue; } if (attributesToOutput.isInRange(i)) { if (!first) { text.append(","); } text.append(test.attribute(i).name()); first = false; } } text.append(")"); } text.append("\n"); }
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Prints the predictions for the given dataset into a supplied StringBuffer * /*from w ww . j a v a2 s.co m*/ * @param classifier the classifier to use * @param train the training data * @param testSource the test set * @param classIndex the class index (1-based), if -1 ot does not override the * class index is stored in the data file (by using the last * attribute) * @param attributesToOutput the indices of the attributes to output * @param printDistribution prints the complete distribution for nominal * classes, not just the predicted value * @param text StringBuffer to hold the printed predictions * @throws Exception if test file cannot be opened */ public static void printClassifications(Classifier classifier, Instances train, DataSource testSource, int classIndex, Range attributesToOutput, boolean printDistribution, StringBuffer text) throws Exception { if (testSource != null) { Instances test = testSource.getStructure(); if (classIndex != -1) { test.setClassIndex(classIndex - 1); } else { if (test.classIndex() == -1) { test.setClassIndex(test.numAttributes() - 1); } } // print the header printClassificationsHeader(test, attributesToOutput, printDistribution, text); // print predictions int i = 0; testSource.reset(); test = testSource.getStructure(test.classIndex()); while (testSource.hasMoreElements(test)) { Instance inst = testSource.nextElement(test); text.append(predictionText(classifier, inst, i, attributesToOutput, printDistribution)); i++; } } // return text.toString(); }
From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java
License:Open Source License
/** * Method used to pre-process the data, perform clustering, and * set the initial parameter vector./*from w w w . j a va 2 s . c o m*/ */ protected Instances initializeClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); data = new Instances(data); data.deleteWithMissingClass(); // Make sure data is shuffled Random random = new Random(m_Seed); if (data.numInstances() > 2) { random = data.getRandomNumberGenerator(m_Seed); } data.randomize(random); double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case int index = 1; while (index < data.numInstances() && data.instance(index).classValue() == y0) { index++; } if (index == data.numInstances()) { // degenerate case, all class values are equal // we don't want to deal with this, too much hassle throw new Exception("All class values are the same. At least two class values should be different"); } double y1 = data.instance(index).classValue(); // Replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(data); data = Filter.useFilter(data, m_ReplaceMissingValues); // Remove useless attributes m_AttFilter = new RemoveUseless(); m_AttFilter.setInputFormat(data); data = Filter.useFilter(data, m_AttFilter); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data after removing useless attributes!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return data; } else { m_ZeroR = null; } // Transform attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(data); data = Filter.useFilter(data, m_NominalToBinary); m_Filter = new Normalize(); ((Normalize) m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(data); data = Filter.useFilter(data, m_Filter); double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case double z1 = data.instance(index).classValue(); m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ??? m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1 m_classIndex = data.classIndex(); m_numClasses = data.numClasses(); m_numAttributes = data.numAttributes(); // Run k-means SimpleKMeans skm = new SimpleKMeans(); skm.setMaxIterations(10000); skm.setNumClusters(m_numUnits); Remove rm = new Remove(); data.setClassIndex(-1); rm.setAttributeIndices((m_classIndex + 1) + ""); rm.setInputFormat(data); Instances dataRemoved = Filter.useFilter(data, rm); data.setClassIndex(m_classIndex); skm.buildClusterer(dataRemoved); Instances centers = skm.getClusterCentroids(); if (centers.numInstances() < m_numUnits) { m_numUnits = centers.numInstances(); } // Set up arrays OFFSET_WEIGHTS = 0; if (m_useAttributeWeights) { OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses; OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes; } else { OFFSET_ATTRIBUTE_WEIGHTS = -1; OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses; } OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes; switch (m_scaleOptimizationOption) { case USE_GLOBAL_SCALE: m_RBFParameters = new double[OFFSET_SCALES + 1]; break; case USE_SCALE_PER_UNIT_AND_ATTRIBUTE: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes]; break; default: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits]; break; } // Set initial radius based on distance to nearest other basis function double maxMinDist = -1; for (int i = 0; i < centers.numInstances(); i++) { double minDist = Double.MAX_VALUE; for (int j = i + 1; j < centers.numInstances(); j++) { double dist = 0; for (int k = 0; k < centers.numAttributes(); k++) { if (k != centers.classIndex()) { double diff = centers.instance(i).value(k) - centers.instance(j).value(k); dist += diff * diff; } } if (dist < minDist) { minDist = dist; } } if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) { maxMinDist = minDist; } } // Initialize parameters if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) { m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist); } for (int i = 0; i < m_numUnits; i++) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) { m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist); } int k = 0; for (int j = 0; j < m_numAttributes; j++) { if (k == centers.classIndex()) { k++; } if (j != data.classIndex()) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) { m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist); } m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k); k++; } } } if (m_useAttributeWeights) { for (int j = 0; j < m_numAttributes; j++) { if (j != data.classIndex()) { m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0; } } } initializeOutputLayer(random); return data; }
From source file:br.com.ufu.lsi.utils.DocumentFrequencyAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Discretizes all attributes that are * numeric.//from w w w . j a va 2 s . c o m * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numAttributes = data.numAttributes(); m_DFs = new int[numAttributes]; Enumeration e = data.enumerateInstances(); while (e.hasMoreElements()) { Instance instance = (Instance) e.nextElement(); int numValues = instance.numValues(); for (int valueIndex = 0; valueIndex < numValues; valueIndex++) { int attIndex = instance.index(valueIndex); if (attIndex != classIndex) { double value = instance.valueSparse(valueIndex); //missingvalues werden also 0 betrachtet. if (m_missingAsZero) { if (!Instance.isMissingValue(value) && value != 0.0) { //man knnte auch isMissingSparce(valueIndex) verwenden, oder ineffizienterweise isMissing(attIndex) m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } else { if (value != 0.0) { m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } } } } }
From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java
License:Apache License
public void trainClassifier(Classifier classifier, FileReader trainingDataset, FileOutputStream trainingModel, Integer crossValidationFoldNumber) throws Exception { Instances instances = new Instances(new BufferedReader(trainingDataset)); switch (classifier) { case KNN:/* ww w .j a v a2 s.com*/ int K = (int) Math.ceil(Math.sqrt(instances.numInstances())); this.classifier = new IBk(K); break; case NB: this.classifier = new NaiveBayes(); } if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); } this.classifier.buildClassifier(instances); if (crossValidationFoldNumber > 0) { Evaluation evaluation = new Evaluation(instances); evaluation.crossValidateModel(this.classifier, instances, crossValidationFoldNumber, new Random(1)); kappa = evaluation.kappa(); fMeasure = evaluation.weightedFMeasure(); confusionMatrix = evaluation.toMatrixString("Confusion matrix: "); } ObjectOutputStream outputStream = new ObjectOutputStream(trainingModel); outputStream.writeObject(this.classifier); outputStream.flush(); outputStream.close(); }
From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java
License:Apache License
public Map<String, String> makePrediction(String username, FileInputStream trainingModel, FileReader testingDataset) throws Exception { Map<String, String> predictions = new HashMap<>(); ObjectInputStream inputStream = new ObjectInputStream(trainingModel); weka.classifiers.Classifier classifier = (weka.classifiers.Classifier) inputStream.readObject(); inputStream.close();//from w w w.jav a2 s .c om Instances instances = new Instances(new BufferedReader(testingDataset)); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); } int last = instances.numInstances() - 1; if (instances.instance(last).stringValue(instances.classIndex()).equals(username)) { double label = classifier.classifyInstance(instances.instance(last)); instances.instance(last).setClassValue(label); predictions.put(username, instances.instance(last).stringValue(instances.classIndex())); } return predictions; }