List of usage examples for weka.core Instance classValue
public double classValue();
From source file:cezeri.feature.selection.FeatureSelectionInfluence.java
private static double[] getAttributeValues(Instances test) { int n = test.numInstances(); double[] ret = new double[n]; for (int i = 0; i < n; i++) { Instance ins = test.instance(i); ret[i] = ins.classValue(); }// w ww . j av a 2s. c o m return ret; }
From source file:cezeri.utils.FactoryInstance.java
/** * * @param m tm dataset/*from ww w.ja v a2 s . c om*/ * @param val class value deeri val olanlar filtrele * @return */ public static Instances[] getSpecificInstancesBasedOnClassValue(Instances m, String[] cl) { Instances[] ret = new Instances[cl.length]; for (int i = 0; i < ret.length; i++) { ret[i] = FactoryInstance.generateInstances(m.relationName() + "_class=" + cl[i], m.numAttributes()); // ret[i] = m.resampleWithWeights(new Random()); ret[i].delete(); } for (int i = 0; i < m.numInstances(); i++) { Instance ins = m.instance(i); for (int j = 0; j < cl.length; j++) { if (("" + (int) ins.classValue()).equals(cl[j])) { ret[j].add(ins); } } } return ret; }
From source file:cezeri.utils.FactoryInstance.java
public static double[] getClassData(Instances m) { double[] ret = new double[m.numInstances()]; for (int i = 0; i < m.numInstances(); i++) { Instance ins = m.instance(i); ret[i] = ins.classValue(); }//from w ww .j ava 2 s. co m return ret; }
From source file:cezeri.utils.FactoryInstance.java
public static double[] getClassData(Instances m, int val) { Vector v = new Vector(); for (int i = 0; i < m.numInstances(); i++) { Instance ins = m.instance(i); if ((int) ins.classValue() == val) { v.add(ins.classValue());/*from w w w . j a va 2 s.c om*/ } } double[] ret = FactoryUtils.toDoubleArray(v); return ret; }
From source file:CGLSMethod.LinearRegression.java
License:Open Source License
/** * Calculate a linear regression using the selected attributes * * @param selectedAttributes an array of booleans where each element * is true if the corresponding attribute should be included in the * regression.// w w w. ja v a 2s . co m * @return an array of coefficients for the linear regression model. * @throws Exception if an error occurred during the regression. */ private double[] doRegression(boolean[] selectedAttributes) throws Exception { if (b_Debug) { System.out.print("doRegression("); for (int i = 0; i < selectedAttributes.length; i++) { System.out.print(" " + selectedAttributes[i]); } System.out.println(" )"); } int numAttributes = 0; for (int i = 0; i < selectedAttributes.length; i++) { if (selectedAttributes[i]) { numAttributes++; } } // Check whether there are still attributes left Matrix independent = null, dependent = null; double[] weights = null; if (numAttributes > 0) { independent = new Matrix(m_TransformedData.numInstances(), numAttributes); dependent = new Matrix(m_TransformedData.numInstances(), 1); for (int i = 0; i < m_TransformedData.numInstances(); i++) { Instance inst = m_TransformedData.instance(i); int column = 0; for (int j = 0; j < m_TransformedData.numAttributes(); j++) { if (j == m_ClassIndex) { dependent.setElement(i, 0, inst.classValue()); } else { if (selectedAttributes[j]) { double value = inst.value(j) - m_Means[j]; // We only need to do this if we want to // scale the input if (!m_checksTurnedOff) { value /= m_StdDevs[j]; } independent.setElement(i, column, value); column++; } } } } // Grab instance weights weights = new double[m_TransformedData.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = m_TransformedData.instance(i).weight(); } } // Compute coefficients (note that we have to treat the // intercept separately so that it doesn't get affected // by the ridge constant.) double[] coefficients = new double[numAttributes + 1]; if (numAttributes > 0) { double[] coeffsWithoutIntercept = independent.regression(dependent, weights, m_Ridge); System.arraycopy(coeffsWithoutIntercept, 0, coefficients, 0, numAttributes); } coefficients[numAttributes] = m_ClassMean; // Convert coefficients into original scale int column = 0; for (int i = 0; i < m_TransformedData.numAttributes(); i++) { if ((i != m_TransformedData.classIndex()) && (selectedAttributes[i])) { // We only need to do this if we have scaled the // input. if (!m_checksTurnedOff) { coefficients[column] /= m_StdDevs[i]; } // We have centred the input coefficients[coefficients.length - 1] -= coefficients[column] * m_Means[i]; column++; } } return coefficients; }
From source file:ChiSquare.ChiSquaredAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully/*from ww w . ja va2s.c om*/ */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false); } } }
From source file:classifier.CustomStringToWordVector.java
License:Open Source License
/** * determines the dictionary.//www . j a va 2s.c o m */ private void determineDictionary() { if (forcedAttributes == null) { // initialize stopwords Stopwords stopwords = new Stopwords(); if (getUseStoplist()) { try { if (getStopwords().exists() && !getStopwords().isDirectory()) stopwords.read(getStopwords()); } catch (Exception e) { e.printStackTrace(); } } // Operate on a per-class basis if class attribute is set int classInd = getInputFormat().classIndex(); int values = 1; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { values = getInputFormat().attribute(classInd).numValues(); } // TreeMap dictionaryArr [] = new TreeMap[values]; TreeMap[] dictionaryArr = new TreeMap[values]; for (int i = 0; i < values; i++) { dictionaryArr[i] = new TreeMap(); } // Make sure we know which fields to convert determineSelectedRange(); // Tokenize all training text into an orderedMap of "words". long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances()); for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance instance = getInputFormat().instance(i); int vInd = 0; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { vInd = (int) instance.classValue(); } // Iterate through all relevant string attributes of the current // instance Hashtable h = new Hashtable(); for (int j = 0; j < instance.numAttributes(); j++) { if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) { // Get tokenizer m_Tokenizer.tokenize(instance.stringValue(j)); // Iterate through tokens, perform stemming, and remove // stopwords // (if required) while (m_Tokenizer.hasMoreElements()) { String word = ((String) m_Tokenizer.nextElement()).intern(); if (this.m_lowerCaseTokens == true) word = word.toLowerCase(); word = m_Stemmer.stem(word); if (this.m_useStoplist == true) if (stopwords.is(word)) continue; if (!(h.contains(word))) h.put(word, new Integer(0)); Count count = (Count) dictionaryArr[vInd].get(word); if (count == null) { dictionaryArr[vInd].put(word, new Count(1)); } else { count.count++; } } } } // updating the docCount for the words that have occurred in // this // instance(document). Enumeration e = h.keys(); while (e.hasMoreElements()) { String word = (String) e.nextElement(); Count c = (Count) dictionaryArr[vInd].get(word); if (c != null) { c.docCount++; } else System.err.println("Warning: A word should definitely be in the " + "dictionary.Please check the code"); } if (pruneRate > 0) { if (i % pruneRate == 0 && i > 0) { for (int z = 0; z < values; z++) { Vector d = new Vector(1000); Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); if (count.count <= 1) { d.add(word); } } Iterator iter = d.iterator(); while (iter.hasNext()) { String word = (String) iter.next(); dictionaryArr[z].remove(word); } } } } } // Figure out the minimum required word frequency int totalsize = 0; int prune[] = new int[values]; for (int z = 0; z < values; z++) { totalsize += dictionaryArr[z].size(); int array[] = new int[dictionaryArr[z].size()]; int pos = 0; Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); array[pos] = count.count; pos++; } // sort the array sortArray(array); if (array.length < m_WordsToKeep) { // if there aren't enough words, set the threshold to // minFreq prune[z] = m_minTermFreq; } else { // otherwise set it to be at least minFreq prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]); } } // Convert the dictionary into an attribute index // and create one attribute per word FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes()); // Add the non-converted attributes int classIndex = -1; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (!m_SelectedRange.isInRange(i)) { if (getInputFormat().classIndex() == i) { classIndex = attributes.size(); } attributes.addElement(getInputFormat().attribute(i).copy()); } } // Add the word vector attributes (eliminating duplicates // that occur in multiple classes) TreeMap newDictionary = new TreeMap(); int index = attributes.size(); for (int z = 0; z < values; z++) { Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); if (count.count >= prune[z]) { if (newDictionary.get(word) == null) { newDictionary.put(word, new Integer(index++)); attributes.addElement(new Attribute(m_Prefix + word)); } } } } // Compute document frequencies m_DocsCounts = new int[attributes.size()]; Iterator it = newDictionary.keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); int idx = ((Integer) newDictionary.get(word)).intValue(); int docsCount = 0; for (int j = 0; j < values; j++) { Count c = (Count) dictionaryArr[j].get(word); if (c != null) docsCount += c.docCount; } m_DocsCounts[idx] = docsCount; } // Trim vector and set instance variables attributes.trimToSize(); m_Dictionary = newDictionary; m_NumInstances = getInputFormat().numInstances(); // Set the filter's output format Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0); outputFormat.setClassIndex(classIndex); setOutputFormat(outputFormat); } else { //m_Dictionary = newDictionary; determineSelectedRange(); m_NumInstances = getInputFormat().numInstances(); TreeMap newDictionary = new TreeMap(); for (int i = 2; i < forcedAttributes.size(); i++) { newDictionary.put(((Attribute) forcedAttributes.get(i)).name(), new Integer(i)); } m_Dictionary = newDictionary; // Set the filter's output format Instances outputFormat = new Instances(getInputFormat().relationName(), forcedAttributes, 0); outputFormat.setClassIndex(1); setOutputFormat(outputFormat); } }
From source file:Classifier.supervised.LinearRegression.java
License:Open Source License
/** * Calculate a linear regression using the selected attributes * * @param selectedAttributes an array of booleans where each element * is true if the corresponding attribute should be included in the * regression.//from w ww .jav a 2s. com * @return an array of coefficients for the linear regression model. * @throws Exception if an error occurred during the regression. */ protected double[] doRegression(boolean[] selectedAttributes) throws Exception { if (m_Debug) { System.out.print("doRegression("); for (int i = 0; i < selectedAttributes.length; i++) { System.out.print(" " + selectedAttributes[i]); } System.out.println(" )"); } int numAttributes = 0; for (int i = 0; i < selectedAttributes.length; i++) { if (selectedAttributes[i]) { numAttributes++; } } // Check whether there are still attributes left Matrix independent = null, dependent = null; if (numAttributes > 0) { independent = new Matrix(m_TransformedData.numInstances(), numAttributes); dependent = new Matrix(m_TransformedData.numInstances(), 1); for (int i = 0; i < m_TransformedData.numInstances(); i++) { Instance inst = m_TransformedData.instance(i); double sqrt_weight = Math.sqrt(inst.weight()); int column = 0; for (int j = 0; j < m_TransformedData.numAttributes(); j++) { if (j == m_ClassIndex) { dependent.set(i, 0, inst.classValue() * sqrt_weight); } else { if (selectedAttributes[j]) { double value = inst.value(j) - m_Means[j]; // We only need to do this if we want to // scale the input if (!m_checksTurnedOff) { value /= m_StdDevs[j]; } independent.set(i, column, value * sqrt_weight); column++; } } } } } // Compute coefficients (note that we have to treat the // intercept separately so that it doesn't get affected // by the ridge constant.) double[] coefficients = new double[numAttributes + 1]; if (numAttributes > 0) { double[] coeffsWithoutIntercept = independent.regression(dependent, m_Ridge).getCoefficients(); System.arraycopy(coeffsWithoutIntercept, 0, coefficients, 0, numAttributes); } coefficients[numAttributes] = m_ClassMean; // Convert coefficients into original scale int column = 0; for (int i = 0; i < m_TransformedData.numAttributes(); i++) { if ((i != m_TransformedData.classIndex()) && (selectedAttributes[i])) { // We only need to do this if we have scaled the // input. if (!m_checksTurnedOff) { coefficients[column] /= m_StdDevs[i]; } // We have centred the input coefficients[coefficients.length - 1] -= coefficients[column] * m_Means[i]; column++; } } return coefficients; }
From source file:classifiers.ComplexClassifier.java
@Override public boolean Classify(Instance in) { double[][] probabilities = new double[Modelmenge.numAttributes()][Modelmenge.numClasses()]; Enumeration<Attribute> enu = Modelmenge.enumerateAttributes(); int attindex = 0; while (enu.hasMoreElements()) { Attribute att = enu.nextElement(); for (int i = 0; i < Modelmenge.numClasses(); i++) { if (att.index() < list.size()) { probabilities[att.index()][i] = list.get(att.index()).getProbs(in); }/* www . j a va 2 s .com*/ } attindex++; } for (int i = 0; i < Modelmenge.numClasses(); i++) { for (int j = 0; j < Modelmenge.numAttributes(); j++) { Classparam[i] *= probabilities[j][i]; } } return (Maxindex(Classparam) == in.classValue()); }
From source file:classifiers.ComplexClassifierZufall.java
@Override public boolean Classify(Instance in) { double[][] probabilities = new double[Modelmenge.numAttributes()][Modelmenge.numClasses()]; Enumeration<Attribute> enu = Modelmenge.enumerateAttributes(); int attindex = 0; while (enu.hasMoreElements()) { Attribute att = enu.nextElement(); for (int i = 0; i < Modelmenge.numClasses(); i++) { if (att.index() < list.size()) { probabilities[att.index()][i] = list.get(att.index()).getProbs(in); }//from w w w . j av a2 s. c om } attindex++; } for (int i = 0; i < Modelmenge.numClasses(); i++) { for (int j = 0; j < Modelmenge.numAttributes(); j++) { Classparam[i] *= probabilities[j][i]; } } return (Maxindex(Classparam) == in.classValue()); }