List of usage examples for weka.core Instance stringValue
public String stringValue(Attribute att);
From source file:moa.streams.filters.ReplacingMissingValuesFilter.java
License:Open Source License
@Override public Instance nextInstance() { Instance inst = (Instance) this.inputStream.nextInstance().copy(); // Initialization if (numAttributes < 0) { numAttributes = inst.numAttributes(); columnsStatistics = new double[numAttributes]; numberOfSamples = new long[numAttributes]; lastNominalValues = new String[numAttributes]; frequencies = new HashMap[numAttributes]; for (int i = 0; i < inst.numAttributes(); i++) { if (inst.attribute(i).isNominal()) frequencies[i] = new HashMap<String, Integer>(); }//from w w w .ja v a2 s .c om numericalSelectedStrategy = this.numericReplacementStrategyOption.getChosenIndex(); nominalSelectedStrategy = this.nominalReplacementStrategyOption.getChosenIndex(); } for (int i = 0; i < numAttributes; i++) { // ---- Numerical values ---- if (inst.attribute(i).isNumeric()) { // Handle missing value if (inst.isMissing(i)) { switch (numericalSelectedStrategy) { case 0: // NOTHING break; case 1: // LAST KNOWN VALUE case 2: // MEAN case 3: // MAX case 4: // MIN inst.setValue(i, columnsStatistics[i]); break; case 5: // CONSTANT inst.setValue(i, numericalConstantValueOption.getValue()); break; default: continue; } } // Update statistics with non-missing values else { switch (numericalSelectedStrategy) { case 1: // LAST KNOWN VALUE columnsStatistics[i] = inst.value(i); break; case 2: // MEAN numberOfSamples[i]++; columnsStatistics[i] = columnsStatistics[i] + (inst.value(i) - columnsStatistics[i]) / numberOfSamples[i]; break; case 3: // MAX columnsStatistics[i] = columnsStatistics[i] < inst.value(i) ? inst.value(i) : columnsStatistics[i]; break; case 4: // MIN columnsStatistics[i] = columnsStatistics[i] > inst.value(i) ? inst.value(i) : columnsStatistics[i]; break; default: continue; } } } // ---- Nominal values ---- else if (inst.attribute(i).isNominal()) { // Handle missing value if (inst.isMissing(i)) { switch (nominalSelectedStrategy) { case 0: // NOTHING break; case 1: // LAST KNOWN VALUE if (lastNominalValues[i] != null) { inst.setValue(i, lastNominalValues[i]); } break; case 2: // MODE if (!frequencies[i].isEmpty()) { // Sort the map to get the most frequent value Map<String, Integer> sortedMap = MapUtil.sortByValue(frequencies[i]); inst.setValue(i, sortedMap.entrySet().iterator().next().getKey()); } break; default: continue; } } // Update statistics with non-missing values else { switch (nominalSelectedStrategy) { case 1: // LAST KNOWN VALUE lastNominalValues[i] = inst.stringValue(i); break; case 2: // MODE Integer previousCounter = frequencies[i].get(inst.stringValue(i)); if (previousCounter == null) previousCounter = 0; frequencies[i].put(inst.stringValue(i), ++previousCounter); break; default: continue; } } } } return inst; }
From source file:mulan.classifier.neural.DataPair.java
License:Open Source License
/** * Creates a {@link DataPair} representation for each {@link Instance} contained in * {@link MultiLabelInstances} data set. The {@link DataPair} is a light weight representation * of instance values (by double values), which is useful when iteration over the data and its * values./* www. j a v a2 s . c o m*/ * * @param mlDataSet the {@link MultiLabelInstances} which content has to be * converted to list of {@link DataPair} * @param bipolarOutput indicates whether output values should be converted * to bipolar values, or left intact as binary * @return the list of data pairs */ // TODO: this method should be in some kind of "data utils". public static List<DataPair> createDataPairs(MultiLabelInstances mlDataSet, boolean bipolarOutput) { Instances data = mlDataSet.getDataSet(); int[] featureIndices = mlDataSet.getFeatureIndices(); int[] labelIndices = mlDataSet.getLabelIndices(); int numFeatures = featureIndices.length; int numLabels = mlDataSet.getNumLabels(); int numInstances = data.numInstances(); List<DataPair> dataPairs = new ArrayList<DataPair>(numInstances); for (int index = 0; index < numInstances; index++) { Instance instance = data.instance(index); double[] input = new double[numFeatures]; for (int i = 0; i < numFeatures; i++) { int featureIndex = featureIndices[i]; Attribute featureAttr = instance.attribute(featureIndex); // if attribute is binary, parse the string value ... it is expected to be '0' or '1' if (featureAttr.isNominal() && featureAttr.numValues() == 2) { input[i] = Double.parseDouble(instance.stringValue(featureIndex)); } // else : // a) the attribute is nominal with multiple values, use indexes as nominal values // do not have to be numbers in general ... this is fall-back ... should be rare case // b) is numeric attribute else { input[i] = instance.value(featureIndex); } } if (mlDataSet.hasMissingLabels(instance)) continue; double[] output = new double[numLabels]; for (int i = 0; i < numLabels; i++) { output[i] = Double .parseDouble(data.attribute(labelIndices[i]).value((int) instance.value(labelIndices[i]))); if (bipolarOutput && output[i] == 0) { output[i] = -1; } } dataPairs.add(new DataPair(input, output)); } return dataPairs; }
From source file:mulan.data.MultiLabelInstances.java
License:Open Source License
private boolean isLabelSet(Instance instance, String labelName, Map<String, Attribute> attributesIndex) { if (instance.stringValue(attributesIndex.get(labelName)).equals("1")) return true; else/*ww w.j ava 2 s. c om*/ return false; }
From source file:mulan.data.Statistics.java
License:Open Source License
/** * This method calculates and prints a matrix with the coocurrences of <br> * pairs of labels// w w w . ja va 2s . c om * * @param mdata a multi-label data set * @return a matrix of co-occurences */ public double[][] calculateCoocurrence(MultiLabelInstances mdata) { Instances data = mdata.getDataSet(); int labels = mdata.getNumLabels(); double[][] coocurrenceMatrix = new double[labels][labels]; numPredictors = data.numAttributes() - labels; for (int k = 0; k < data.numInstances(); k++) { Instance temp = data.instance(k); for (int i = 0; i < labels; i++) { for (int j = 0; j < labels; j++) { if (i >= j) { continue; } if (temp.stringValue(numPredictors + i).equals("1") && temp.stringValue(numPredictors + j).equals("1")) { coocurrenceMatrix[i][j]++; } } } } for (int i = 0; i < labels; i++) { for (int j = 0; j < labels; j++) { System.out.print(coocurrenceMatrix[i][j] + "\t"); } System.out.println(); } return coocurrenceMatrix; }
From source file:naivebayes.NBTubesAI.java
@Override public void buildClassifier(Instances data) throws Exception { distribution = new HashMap<>(); classCount = new HashMap<>(); data = new Instances(data); //Delete data tanpa kelas data.deleteWithMissingClass();/*www . j av a 2 s . co m*/ //melakukan filter discretize untuk mengubah atribut menjadi nominal //menghitung jumlah instance m_Instances = new Instances(data); numInstance = data.numInstances(); //Enumerasi seluruh atribut instances Enumeration<Attribute> enumAttr = m_Instances.enumerateAttributes(); //Index attribut saat ini int attrIndex = 0; //Hashmap untuk menghitung jumlah kemunculan kelas yang bersesuaian for (int i = 0; i < m_Instances.classAttribute().numValues(); i++) { classCount.put(i + 0.0, 0); } Enumeration<Instance> forCount = m_Instances.enumerateInstances(); while (forCount.hasMoreElements()) { Instance instCount = forCount.nextElement(); classCount.put(instCount.classValue(), classCount.get(instCount.classValue()) + 1); } System.out.println("JMLAH KELAS:" + m_Instances.numClasses()); System.out.println(classCount.toString()); //Looping untuk seluruh atribut while (enumAttr.hasMoreElements()) { Attribute temp = enumAttr.nextElement(); //nama attribute String attrName = temp.name(); //Memasukkan kunci attrName if (distribution.get(attrName) == null) { distribution.put(attrName, new HashMap<String, HashMap<Double, Double>>()); } //Enumerasi dari seluruh instance pada Instances masukan Enumeration<Instance> enumInst = m_Instances.enumerateInstances(); //Looping untuk seluruh instance while (enumInst.hasMoreElements()) { //Mengambil Instance selanjutnya Instance tempInst = enumInst.nextElement(); //Nilai domain untuk atribut saat ini String nilaiDomain = tempInst.stringValue(temp); //Class dari instance ini double classAttr = tempInst.classValue(); if (distribution.get(attrName).get(nilaiDomain) == null) { //Membuat hashmap baru jika domainNilai pertama kali muncul distribution.get(attrName).put(nilaiDomain, new HashMap<Double, Double>()); } if (distribution.get(attrName).get(nilaiDomain).get(classAttr) == null) { //Membuat hashmap baru jika untuk pasangan domain nilai dan //kelas ini baru pertama kali muncul for (int i = 0; i < m_Instances.numClasses(); i++) { distribution.get(attrName).get(nilaiDomain).put(i + 0.0, 0.0); } } //Menambahkan frekuensi kemunculan +1 distribution.get(attrName).get(nilaiDomain).put(classAttr, distribution.get(attrName).get(nilaiDomain).get(classAttr) + (1.0 / classCount.get(classAttr))); } attrIndex++; } System.out.println(distribution.toString()); System.out.println(classCount.toString()); }
From source file:naivebayes.NBTubesAI.java
@Override public double classifyInstance(Instance instance) throws Exception { int jumlahKelas = instance.classAttribute().numValues(); double[] classifyResult = new double[jumlahKelas]; //iterasi menghitung probabilitas untuk seluruh kelas for (int i = 0; i < jumlahKelas; i++) { //Rumus probabilitas Naive Bayes here classifyResult[i] = (double) classCount.get(i + 0.0) / numInstance; Enumeration<Attribute> enumAttr = instance.enumerateAttributes(); while (enumAttr.hasMoreElements()) { Attribute temp = enumAttr.nextElement(); if (!instance.isMissing(temp)) { try { classifyResult[i] = classifyResult[i] * distribution.get(temp.name()).get(instance.stringValue(temp)).get(i + 0.0); } catch (NullPointerException e) { classifyResult[i] = 0; }//w w w.j a v a 2s . co m } } } double maxValue = 0; int currentIndex = 0; for (int i = 0; i < jumlahKelas; i++) { if (maxValue < classifyResult[i]) { currentIndex = i; maxValue = classifyResult[i]; } } return currentIndex; }
From source file:naivebayes.NBTubesAI.java
@Override public double[] distributionForInstance(Instance instance) throws Exception { int jumlahKelas = instance.classAttribute().numValues(); double[] classifyResult = new double[jumlahKelas]; //iterasi menghitung probabilitas untuk seluruh kelas for (int i = 0; i < jumlahKelas; i++) { //Rumus probabilitas Naive Bayes here classifyResult[i] = (double) classCount.get(i + 0.0) / numInstance; Enumeration<Attribute> enumAttr = instance.enumerateAttributes(); while (enumAttr.hasMoreElements()) { Attribute temp = enumAttr.nextElement(); if (!instance.isMissing(temp)) { try { classifyResult[i] = classifyResult[i] * distribution.get(temp.name()).get(instance.stringValue(temp)).get(i + 0.0); } catch (NullPointerException e) { }// ww w. j a va2s .c o m } } } return classifyResult; }
From source file:NaiveBayesPckge.mushClass.java
@Override public double classifyInstance(Instance instance) throws java.lang.Exception { double classify = 0; // banyaknya kesimpulan. Misal T dan F berati ada 2 int numClasses = instance.numClasses(); double[] out = new double[numClasses]; //banyaknya kelas yang diuji int class_index = instance.classIndex(); //banyaknya atribut int num_attributes = instance.numAttributes(); double inputs[] = new double[num_attributes]; for (int i = 0; i < numClasses; i++) { out[i] = probabConclusion[i];//from w w w .jav a2 s. c o m for (int j = 0; j < num_attributes - 1; j++) { int indexLabel = searchIndexLabel(j, instance.stringValue(j)); out[i] *= (double) atribNom[j].getAttribObjectType(indexLabel, i); } } classify = getIndexBiggestProbability(out); return classify; }
From source file:NaiveBayesPckge.mushClass.java
@Override public double[] distributionForInstance(Instance instance) throws Exception { // banyaknya kesimpulan. Misal T dan F berati ada 2 int numClasses = instance.numClasses(); double[] out = new double[numClasses]; //banyaknya kelas yang diuji int class_index = instance.classIndex(); //banyaknya atribut int num_attributes = instance.numAttributes(); double inputs[] = new double[num_attributes]; for (int i = 0; i < numClasses; i++) { out[i] = probabConclusion[i];//from w ww. j av a2 s . co m // System.out.print("\n" + maxIterasi +". out["+i+"] = "); for (int j = 1; j < num_attributes; j++) { int indexLabel = searchIndexLabel(j, instance.stringValue(j)); out[i] *= (double) atribNom[j].getAttribObjectType(indexLabel, i); // System.out.print(atribNom[j].getAttribObjectType(indexLabel, i) + "*"); } // System.out.println("\nout["+i+"] = "+out[i]); // System.out.println(instance.toString()); } // maxIterasi++; return out; }
From source file:NaiveBayesPckge.NaiveBayesCode.java
@Override public double[] distributionForInstance(Instance instance) throws Exception { // banyaknya kesimpulan. Misal T dan F berati ada 2 int numClasses = instance.numClasses(); double[] out = new double[numClasses]; //banyaknya kelas yang diuji int class_index = instance.classIndex(); //banyaknya atribut int num_attributes = instance.numAttributes(); double inputs[] = new double[num_attributes]; for (int i = 0; i < numClasses; i++) { out[i] = probabConclusion[i];//from w ww .ja va 2 s .c o m // System.out.print("\n" + maxIterasi +". out["+i+"] = "); for (int j = 0; j < num_attributes - 1; j++) { int indexLabel = searchIndexLabel(j, instance.stringValue(j)); out[i] *= (double) atribNom[j].getAttribObjectType(indexLabel, i); // System.out.print(atribNom[j].getAttribObjectType(indexLabel, i) + "*"); } // System.out.println("\nout["+i+"] = "+out[i]); // System.out.println(instance.toString()); } // maxIterasi++; return out; }