List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:entities.WekaBOWFeatureVector.java
public Instance fillFeatureVector(BOWFeatureVector vSource, Instances data) { double[] values = new double[data.numAttributes()]; values[0] = vSource.getCosSimilarityArrayAtIndex(0);//((vSource.getCosSimilarityArrayAtIndex(0) + vSource.getCosSimilarityArrayAtIndex(1))); values[1] = vSource.getCosSimilarityArrayAtIndex(1);//((vSource.getCosSimilarityArrayAtIndex(0) + vSource.getCosSimilarityArrayAtIndex(1))); values[2] = data.attribute(2).indexOfValue(vSource.getLabel()); Instance inst = new DenseInstance(1.0, values); return inst;/*from www . j a v a2s . c om*/ }
From source file:entities.WekaHMMFeatureVector.java
public Instance fillFeatureVector(HMMFeatureVector vSource, Instances data) { double[] values = new double[data.numAttributes()]; double min_prob; values[0] = vSource.getProbArrayAtIndex(0) / (vSource.getProbArrayAtIndex(0) + vSource.getProbArrayAtIndex(1)); values[1] = vSource.getProbArrayAtIndex(1) / (vSource.getProbArrayAtIndex(0) + vSource.getProbArrayAtIndex(1)); values[2] = data.attribute(2).indexOfValue(vSource.getLabel()); Instance inst = new DenseInstance(1.0, values); return inst;/*from w w w . j av a2s . c o m*/ }
From source file:entities.WekaNGGFeatureVector.java
public Instance fillFeatureVector(NGGFeatureVector vSource, Instances data) { double[] values = new double[data.numAttributes()]; values[0] = vSource.getContainmentSimilarityArrayAtIndex(0); values[1] = vSource.getSizeSimilarityArrayAtIndex(0); values[2] = vSource.getValueSimilarityArrayAtIndex(0); values[3] = vSource.getNVSArrayAtIndex(0); values[4] = vSource.getContainmentSimilarityArrayAtIndex(1); values[5] = vSource.getSizeSimilarityArrayAtIndex(1); values[6] = vSource.getValueSimilarityArrayAtIndex(1); values[7] = vSource.getNVSArrayAtIndex(1); values[8] = data.attribute(8).indexOfValue(vSource.getLabel()); Instance inst = new DenseInstance(1.0, values); return inst;/*from www . ja v a 2s . c om*/ }
From source file:entity.NoiseInjectionManager.java
License:Open Source License
/** * /*from ww w. j a v a 2 s. co m*/ * Increments fp and fn by specified percentages. * Randomize order of instances and modifies instances until noise quota is reached. * Than randomized instances again. * NOTE: It modifies the given dataset, because it is a reference. * * @param origDataset * @param fpPercentage * @param fnPercentage * @return Instances noisyDataset */ public Instances addNoiseToDataset(Instances origDataset, BigDecimal fpPercentage, BigDecimal fnPercentage) { // exits if no noise must be added if (fnPercentage.equals(BigDecimal.ZERO) && fpPercentage.equals(BigDecimal.ZERO)) { if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere"); return origDataset; } // total instances in dataset int numInstances = origDataset.numInstances(); // finds positive (buggy) and negative (non-buggy) instances numbers int numOfPositives = 0; int numOfNegatives = 0; for (int j = 0; j < numInstances; j++) { if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) { numOfPositives++; } // this is a redundant control, but better safe than sorry else if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.nonbuggyLabel)) { numOfNegatives++; } } // calculates the number of false positives to insert int fpToInsert = (int) Math.round(numOfNegatives * fpPercentage.doubleValue() / 100); int fpInserted = 0; if (verbose) System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpToInsert= " + fpToInsert + ", totIntances= " + origDataset.numInstances() + " true negatives= " + numOfNegatives + " %fp= " + fpPercentage); // calculates the number of false negatives to insert int fnToInsert = (int) Math.round(numOfPositives * fnPercentage.doubleValue() / 100); int fnInserted = 0; if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] fnToInsert= " + fnToInsert + ", totIntances= " + origDataset.numInstances() + " true positives= " + numOfPositives + " %fn= " + fnPercentage); if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel + " - nonbuggy label: " + Settings.nonbuggyLabel); // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); for (int i = 0; i < origDataset.numInstances(); i++) { if (verbose) System.out.print("\nORIGINAL VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); // gets the classification attribute (it HAS to be the last) Attribute att = origDataset.instance(i).attribute(origDataset.classIndex()); // if there are fn to add and this is a positive instances it turns it into a negative, making it a fn if ((fnInserted < fnToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.buggyLabel))) { origDataset.instance(i).setValue(att, Settings.nonbuggyLabel); fnInserted++; if (verbose) System.out.print(" - added FN, added " + fnInserted + " of " + fnToInsert + " "); } // if there are fp to add and this is a negative instances it turns it into a positive, making it a fp else if ((fpInserted < fpToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.nonbuggyLabel))) { origDataset.instance(i).setValue(att, Settings.buggyLabel); fpInserted++; if (verbose) System.out.print(" - added FP, added " + fpInserted + " of " + fpToInsert + " "); } if (verbose) System.out.print(" FINAL ELEMENT VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); } // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); return origDataset; }
From source file:entity.NoiseInjectionManager.java
License:Open Source License
/** * Increments fp and fn in combination by a specified percentages. * Randomize order of instances and modifies instances until noise quota is reached. * Than randomized instances again./* w w w. j ava 2 s . co m*/ * NOTE: It modifies the given dataset, because it is a reference. * * @param origDataset * @param combinedFpFnPercentage * @return noisydata */ public Instances addNoiseToDataset(Instances origDataset, BigDecimal combinedFpFnPercentage) { // exits if no noise must be added if (combinedFpFnPercentage.equals(BigDecimal.ZERO)) { if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere"); return origDataset; } // total instances in dataset int numInstances = origDataset.numInstances(); // finds positive (buggy) and negative (non-buggy) instances numbers int fpAndFnToInsert = (int) Math.round(numInstances * combinedFpFnPercentage.doubleValue() / 100); int fpAndFnInserted = 0; if (verbose) System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpAndFnToInsert= " + fpAndFnToInsert + ", totIntances= " + origDataset.numInstances()); if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel + " - nonbuggy label: " + Settings.nonbuggyLabel); // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); for (int i = 0; i < origDataset.numInstances(); i++) { if (verbose) System.out.print("\nORIGINAL VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); // gets the classification attribute (it HAS to be the last) Attribute att = origDataset.instance(i).attribute(origDataset.classIndex()); // if there are fn or fp to add if (fpAndFnInserted < fpAndFnToInsert) { // if this is a positive instances it turns it into a negative, making it a fn if (origDataset.instance(i).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) { if (verbose) System.out.print(" - added FN, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " "); origDataset.instance(i).setValue(att, Settings.nonbuggyLabel); fpAndFnInserted++; } // if this is a negative instances it turns it into a positive, making it a fp else if (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.nonbuggyLabel)) { if (verbose) System.out.print(" - added FP, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " "); origDataset.instance(i).setValue(att, Settings.buggyLabel); fpAndFnInserted++; } } if (verbose) System.out.print(" FINAL ELEMENT VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); } // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); return origDataset; }
From source file:epsi.i5.datamining.Weka.java
public void excutionAlgo() throws FileNotFoundException, IOException, Exception { BufferedReader reader = new BufferedReader(new FileReader("src/epsi/i5/data/" + fileOne + ".arff")); Instances data = new Instances(reader); reader.close();/* w w w . ja v a2 s . c o m*/ //System.out.println(data.attribute(0)); data.setClass(data.attribute(0)); NaiveBayes NB = new NaiveBayes(); NB.buildClassifier(data); Evaluation naiveBayes = new Evaluation(data); naiveBayes.crossValidateModel(NB, data, 10, new Random(1)); naiveBayes.evaluateModel(NB, data); //System.out.println(test.confusionMatrix() + "1"); //System.out.println(test.correct() + "2"); System.out.println("*****************************"); System.out.println("******** Naive Bayes ********"); System.out.println(naiveBayes.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(naiveBayes.pctCorrect()); System.out.println(""); J48 j = new J48(); j.buildClassifier(data); Evaluation jeval = new Evaluation(data); jeval.crossValidateModel(j, data, 10, new Random(1)); jeval.evaluateModel(j, data); System.out.println("*****************************"); System.out.println("************ J48 ************"); System.out.println(jeval.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(jeval.pctCorrect()); System.out.println(""); DecisionTable DT = new DecisionTable(); DT.buildClassifier(data); Evaluation decisionTable = new Evaluation(data); decisionTable.crossValidateModel(DT, data, 10, new Random(1)); decisionTable.evaluateModel(DT, data); System.out.println("*****************************"); System.out.println("******* DecisionTable *******"); System.out.println(decisionTable.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(decisionTable.pctCorrect()); System.out.println(""); OneR OR = new OneR(); OR.buildClassifier(data); Evaluation oneR = new Evaluation(data); oneR.crossValidateModel(OR, data, 10, new Random(1)); oneR.evaluateModel(OR, data); System.out.println("*****************************"); System.out.println("************ OneR ***********"); System.out.println(oneR.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(oneR.pctCorrect()); //Polarit data.setClass(data.attribute(1)); System.out.println(""); M5Rules MR = new M5Rules(); MR.buildClassifier(data); Evaluation m5rules = new Evaluation(data); m5rules.crossValidateModel(MR, data, 10, new Random(1)); m5rules.evaluateModel(MR, data); System.out.println("*****************************"); System.out.println("********** M5Rules **********"); System.out.println(m5rules.correlationCoefficient()); System.out.println(""); LinearRegression LR = new LinearRegression(); LR.buildClassifier(data); Evaluation linearR = new Evaluation(data); linearR.crossValidateModel(LR, data, 10, new Random(1)); linearR.evaluateModel(LR, data); System.out.println("*****************************"); System.out.println("********** linearR **********"); System.out.println(linearR.correlationCoefficient()); }
From source file:es.bsc.autonomic.powermodeller.tools.classifiers.BaggingClassifier.java
License:Apache License
@Override protected Classifier buildClassifier(DataSet training_ds) { logger.debug("Building Bagging classifier."); Classifier model = null;//from ww w. j av a 2 s. com // Get the independent variable index String independent = training_ds.getIndependent(); if (independent == null) throw new WekaWrapperException("Independent variable is not set in dataset."); try { // Read all the instances in the file (ARFF, CSV, XRFF, ...) ConverterUtils.DataSource source = new ConverterUtils.DataSource(training_ds.getFilePath()); Instances instances = source.getDataSet(); // Set the independent variable (powerWatts). instances.setClassIndex(instances.attribute(independent).index()); // Builds a regression model for the given data. model = new weka.classifiers.meta.Bagging(); // Build Linear Regression model.buildClassifier(instances); } catch (WekaWrapperException e) { logger.error("Error while creating Bagging classifier.", e); throw new WekaWrapperException("Error while creating Bagging classifier."); } catch (Exception e) { logger.error("Error while applying Bagging to data set instances.", e); throw new WekaWrapperException("Error while applying Bagging to data set instances."); } return model; }
From source file:es.bsc.autonomic.powermodeller.tools.classifiers.LinearRegressionClassifier.java
License:Apache License
@Override public Classifier buildClassifier(DataSet training_ds) { logger.debug("Building LinearRegression classifier."); Classifier model;//w w w . ja v a 2 s. c o m // Get the independent variable index String independent = training_ds.getIndependent(); if (independent == null) throw new WekaWrapperException("Independent variable is not set in dataset."); try { // Read all the instances in the file (ARFF, CSV, XRFF, ...) ConverterUtils.DataSource source = new ConverterUtils.DataSource(training_ds.getFilePath()); Instances instances = source.getDataSet(); // Set the independent variable (powerWatts). instances.setClassIndex(instances.attribute(independent).index()); // Builds a regression model for the given data. model = new weka.classifiers.functions.LinearRegression(); // Build Linear Regression model.buildClassifier(instances); } catch (WekaWrapperException e) { logger.error("Error while creating Linear Regression classifier.", e); throw new WekaWrapperException("Error while creating Linear Regression classifier."); } catch (Exception e) { logger.error("Error while applying Linear Regression to data set instances.", e); throw new WekaWrapperException("Error while applying Linear Regression to data set instances."); } return model; }
From source file:es.bsc.autonomic.powermodeller.tools.classifiers.MultilayerPerceptronClassifier.java
License:Apache License
@Override protected Classifier buildClassifier(DataSet training_ds) { logger.debug("Building MultilayerPerceptron classifier."); MultilayerPerceptron model;/*from w ww.java 2 s. c om*/ // Get the independent variable index String independent = training_ds.getIndependent(); if (independent == null) throw new WekaWrapperException("Independent variable is not set in dataset."); try { // Read all the instances in the file (ARFF, CSV, XRFF, ...) ConverterUtils.DataSource source = new ConverterUtils.DataSource(training_ds.getFilePath()); Instances instances = source.getDataSet(); // Set the independent variable (powerWatts). instances.setClassIndex(instances.attribute(independent).index()); // Builds a regression model for the given data. model = new weka.classifiers.functions.MultilayerPerceptron(); model.setHiddenLayers("4"); model.setTrainingTime(20); // Build Linear Regression model.buildClassifier(instances); } catch (WekaWrapperException e) { logger.error("Error while creating Linear Regression classifier.", e); throw new WekaWrapperException("Error while creating Linear Regression classifier."); } catch (Exception e) { logger.error("Error while applying Linear Regression to data set instances.", e); throw new WekaWrapperException("Error while applying Linear Regression to data set instances."); } return model; }
From source file:es.bsc.autonomic.powermodeller.tools.classifiers.RepTreeClassifier.java
License:Apache License
@Override public Classifier buildClassifier(DataSet training_ds) { logger.debug("Building RepTree classifier."); Classifier model;//w w w . j av a2 s . c o m // Get the independent variable index String independent = training_ds.getIndependent(); if (independent == null) throw new WekaWrapperException("Independent variable is not set in dataset."); try { // Read all the instances in the file (ARFF, CSV, XRFF, ...) ConverterUtils.DataSource source = new ConverterUtils.DataSource(training_ds.getFilePath()); Instances instances = source.getDataSet(); // Set the independent variable (powerWatts). instances.setClassIndex(instances.attribute(independent).index()); // Builds a regression model for the given data. model = new weka.classifiers.trees.REPTree(); // Build Linear Regression model.buildClassifier(instances); } catch (WekaWrapperException e) { logger.error("Error while creating Linear Regression classifier.", e); throw new WekaWrapperException("Error while creating Linear Regression classifier."); } catch (Exception e) { logger.error("Error while applying Linear Regression to data set instances.", e); throw new WekaWrapperException("Error while applying Linear Regression to data set instances."); } return model; }