List of usage examples for weka.core Instance classValue
public double classValue();
From source file:j48.Distribution.java
License:Open Source License
/** * Subtracts given instance from given bag. * * @exception Exception if something goes wrong *//*from www . j a v a2 s .c o m*/ public final void sub(int bagIndex, Instance instance) throws Exception { int classIndex; double weight; classIndex = (int) instance.classValue(); weight = instance.weight(); m_perClassPerBag[bagIndex][classIndex] = m_perClassPerBag[bagIndex][classIndex] - weight; m_perBag[bagIndex] = m_perBag[bagIndex] - weight; m_perClass[classIndex] = m_perClass[classIndex] - weight; totaL = totaL - weight; }
From source file:j48.Distribution.java
License:Open Source License
/** * Adds all instances with unknown values for given attribute, weighted * according to frequency of instances in each bag. * * @exception Exception if something goes wrong *//*from w ww .j a va 2 s . c o m*/ public final void addInstWithUnknown(Instances source, int attIndex) throws Exception { double[] probs; double weight, newWeight; int classIndex; Instance instance; int j; probs = new double[m_perBag.length]; for (j = 0; j < m_perBag.length; j++) { if (Utils.eq(totaL, 0)) { probs[j] = 1.0 / probs.length; } else { probs[j] = m_perBag[j] / totaL; } } Enumeration enu = source.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (instance.isMissing(attIndex)) { classIndex = (int) instance.classValue(); weight = instance.weight(); m_perClass[classIndex] = m_perClass[classIndex] + weight; totaL = totaL + weight; for (j = 0; j < m_perBag.length; j++) { newWeight = probs[j] * weight; m_perClassPerBag[j][classIndex] = m_perClassPerBag[j][classIndex] + newWeight; m_perBag[j] = m_perBag[j] + newWeight; } } } }
From source file:j48.Distribution.java
License:Open Source License
/** * Adds all instances in given range to given bag. * * @exception Exception if something goes wrong *//*from www . j a v a 2s .c o m*/ public final void addRange(int bagIndex, Instances source, int startIndex, int lastPlusOne) throws Exception { double sumOfWeights = 0; int classIndex; Instance instance; int i; for (i = startIndex; i < lastPlusOne; i++) { instance = (Instance) source.instance(i); classIndex = (int) instance.classValue(); sumOfWeights = sumOfWeights + instance.weight(); m_perClassPerBag[bagIndex][classIndex] += instance.weight(); m_perClass[classIndex] += instance.weight(); } m_perBag[bagIndex] += sumOfWeights; totaL += sumOfWeights; }
From source file:j48.Distribution.java
License:Open Source License
/** * Adds given instance to all bags weighting it according to given weights. * * @exception Exception if something goes wrong *///from w ww .j a v a2 s . c o m public final void addWeights(Instance instance, double[] weights) throws Exception { int classIndex; int i; classIndex = (int) instance.classValue(); for (i = 0; i < m_perBag.length; i++) { double weight = instance.weight() * weights[i]; m_perClassPerBag[i][classIndex] = m_perClassPerBag[i][classIndex] + weight; m_perBag[i] = m_perBag[i] + weight; m_perClass[classIndex] = m_perClass[classIndex] + weight; totaL = totaL + weight; } }
From source file:j48.Distribution.java
License:Open Source License
/** * Deletes given instance from given bag. * * @exception Exception if something goes wrong *//*from w w w . ja v a 2s . c o m*/ public final void del(int bagIndex, Instance instance) throws Exception { int classIndex; double weight; classIndex = (int) instance.classValue(); weight = instance.weight(); m_perClassPerBag[bagIndex][classIndex] = m_perClassPerBag[bagIndex][classIndex] - weight; m_perBag[bagIndex] = m_perBag[bagIndex] - weight; m_perClass[classIndex] = m_perClass[classIndex] - weight; totaL = totaL - weight; }
From source file:j48.Distribution.java
License:Open Source License
/** * Deletes all instances in given range from given bag. * * @exception Exception if something goes wrong *//*from w w w . j a va 2s . com*/ public final void delRange(int bagIndex, Instances source, int startIndex, int lastPlusOne) throws Exception { double sumOfWeights = 0; int classIndex; Instance instance; int i; for (i = startIndex; i < lastPlusOne; i++) { instance = (Instance) source.instance(i); classIndex = (int) instance.classValue(); sumOfWeights = sumOfWeights + instance.weight(); m_perClassPerBag[bagIndex][classIndex] -= instance.weight(); m_perClass[classIndex] -= instance.weight(); } m_perBag[bagIndex] -= sumOfWeights; totaL -= sumOfWeights; }
From source file:j48.Distribution.java
License:Open Source License
/** * Shifts given instance from one bag to another one. * * @exception Exception if something goes wrong *///from w w w . ja va2 s . c om public final void shift(int from, int to, Instance instance) throws Exception { int classIndex; double weight; classIndex = (int) instance.classValue(); weight = instance.weight(); m_perClassPerBag[from][classIndex] -= weight; m_perClassPerBag[to][classIndex] += weight; m_perBag[from] -= weight; m_perBag[to] += weight; }
From source file:j48.Distribution.java
License:Open Source License
/** * Shifts all instances in given range from one bag to another one. * * @exception Exception if something goes wrong *//*from ww w.j a va 2 s . c o m*/ public final void shiftRange(int from, int to, Instances source, int startIndex, int lastPlusOne) throws Exception { int classIndex; double weight; Instance instance; int i; for (i = startIndex; i < lastPlusOne; i++) { instance = (Instance) source.instance(i); classIndex = (int) instance.classValue(); weight = instance.weight(); m_perClassPerBag[from][classIndex] -= weight; m_perClassPerBag[to][classIndex] += weight; m_perBag[from] -= weight; m_perBag[to] += weight; } }
From source file:kea.KEAFilter.java
License:Open Source License
/** * Converts an instance.// w ww . j av a 2s . co m */ private FastVector convertInstance(Instance instance, boolean training) throws Exception { FastVector vector = new FastVector(); if (m_Debug) { System.err.println("-- Converting instance"); } // Get the key phrases for the document HashMap hashKeyphrases = null; HashMap hashKeysEval = null; if (!instance.isMissing(m_KeyphrasesAtt)) { String keyphrases = instance.stringValue(m_KeyphrasesAtt); hashKeyphrases = getGivenKeyphrases(keyphrases, false); hashKeysEval = getGivenKeyphrases(keyphrases, true); } // Get the phrases for the document HashMap hash = new HashMap(); int length = getPhrases(hash, instance.stringValue(m_DocumentAtt)); // Compute number of extra attributes int numFeatures = 5; if (m_Debug) { if (m_KFused) { numFeatures = numFeatures + 1; } } // Set indices of key attributes int phraseAttIndex = m_DocumentAtt; int tfidfAttIndex = m_DocumentAtt + 2; int distAttIndex = m_DocumentAtt + 3; int probsAttIndex = m_DocumentAtt + numFeatures - 1; // Go through the phrases and convert them into instances Iterator it = hash.keySet().iterator(); while (it.hasNext()) { String phrase = (String) it.next(); FastVector phraseInfo = (FastVector) hash.get(phrase); double[] vals = featVals(phrase, phraseInfo, training, hashKeysEval, hashKeyphrases, length); Instance inst = new Instance(instance.weight(), vals); inst.setDataset(m_ClassifierData); // Get probability of phrase being key phrase double[] probs = m_Classifier.distributionForInstance(inst); double prob = probs[1]; // Compute attribute values for final instance double[] newInst = new double[instance.numAttributes() + numFeatures]; int pos = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (i == m_DocumentAtt) { // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(phrase); newInst[pos++] = index; // Add original version index = outputFormatPeek().attribute(pos).addStringValue((String) phraseInfo.elementAt(2)); newInst[pos++] = index; // Add TFxIDF newInst[pos++] = inst.value(m_TfidfIndex); // Add distance newInst[pos++] = inst.value(m_FirstOccurIndex); // Add other features if (m_Debug) { if (m_KFused) { newInst[pos++] = inst.value(m_KeyFreqIndex); } } // Add probability probsAttIndex = pos; newInst[pos++] = prob; // Set rank to missing (computed below) newInst[pos++] = Instance.missingValue(); } else if (i == m_KeyphrasesAtt) { newInst[pos++] = inst.classValue(); } else { newInst[pos++] = instance.value(i); } } Instance ins = new Instance(instance.weight(), newInst); ins.setDataset(outputFormatPeek()); vector.addElement(ins); } // Add dummy instances for keyphrases that don't occur // in the document if (hashKeysEval != null) { Iterator phrases = hashKeysEval.keySet().iterator(); while (phrases.hasNext()) { String phrase = (String) phrases.next(); double[] newInst = new double[instance.numAttributes() + numFeatures]; int pos = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (i == m_DocumentAtt) { // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(phrase); newInst[pos++] = (double) index; // Add original version index = outputFormatPeek().attribute(pos).addStringValue((String) hashKeysEval.get(phrase)); newInst[pos++] = (double) index; // Add TFxIDF newInst[pos++] = Instance.missingValue(); // Add distance newInst[pos++] = Instance.missingValue(); // Add other features if (m_Debug) { if (m_KFused) { newInst[pos++] = Instance.missingValue(); } } // Add probability and rank newInst[pos++] = -Double.MAX_VALUE; newInst[pos++] = Instance.missingValue(); } else if (i == m_KeyphrasesAtt) { newInst[pos++] = 1; // Keyphrase } else { newInst[pos++] = instance.value(i); } } Instance inst = new Instance(instance.weight(), newInst); inst.setDataset(outputFormatPeek()); vector.addElement(inst); } } // Sort phrases according to their distance (stable sort) double[] vals = new double[vector.size()]; for (int i = 0; i < vals.length; i++) { vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex); } FastVector newVector = new FastVector(vector.size()); int[] sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their tfxidf value (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their probability (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Compute rank of phrases. Check for subphrases that are ranked // lower than superphrases and assign probability -1 and set the // rank to Integer.MAX_VALUE int rank = 1; for (int i = 0; i < vals.length; i++) { Instance currentInstance = (Instance) vector.elementAt(i); // Short cut: if phrase very unlikely make rank very low and continue if (Utils.grOrEq(vals[i], 1.0)) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); continue; } // Otherwise look for super phrase starting with first phrase // in list that has same probability, TFxIDF value, and distance as // current phrase. We do this to catch all superphrases // that have same probability, TFxIDF value and distance as current phrase. int startInd = i; while (startInd < vals.length) { Instance inst = (Instance) vector.elementAt(startInd); if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex)) || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex)) || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) { break; } startInd++; } String val = currentInstance.stringValue(phraseAttIndex); boolean foundSuperphrase = false; for (int j = startInd - 1; j >= 0; j--) { if (j != i) { Instance candidate = (Instance) vector.elementAt(j); String potSuperphrase = candidate.stringValue(phraseAttIndex); if (val.length() <= potSuperphrase.length()) { if (KEAFilter.contains(val, potSuperphrase)) { foundSuperphrase = true; break; } } } } if (foundSuperphrase) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); } else { currentInstance.setValue(probsAttIndex + 1, rank++); } } return vector; }
From source file:LogReg.Logistic.java
License:Open Source License
/** * Builds the classifier/* ww w . j ava 2 s .c o m*/ * * @param train the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances train) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(train); // remove instances with missing class train = new Instances(train); train.deleteWithMissingClass(); // Replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(train); train = Filter.useFilter(train, m_ReplaceMissingValues); // Remove useless attributes m_AttFilter = new RemoveUseless(); m_AttFilter.setInputFormat(train); train = Filter.useFilter(train, m_AttFilter); // Transform attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(train); train = Filter.useFilter(train, m_NominalToBinary); // Save the structure for printing the model m_structure = new Instances(train, 0); // Extract data m_ClassIndex = train.classIndex(); m_NumClasses = train.numClasses(); int nK = m_NumClasses - 1; // Only K-1 class labels needed int nR = m_NumPredictors = train.numAttributes() - 1; int nC = train.numInstances(); m_Data = new double[nC][nR + 1]; // Data values int[] Y = new int[nC]; // Class labels double[] xMean = new double[nR + 1]; // Attribute means xSD = new double[nR + 1]; // Attribute stddev's double[] sY = new double[nK + 1]; // Number of classes double[] weights = new double[nC]; // Weights of instances double totWeights = 0; // Total weights of the instances m_Par = new double[nR + 1][nK]; // Optimized parameter values if (m_Debug) { System.out.println("Extracting data..."); } for (int i = 0; i < nC; i++) { // initialize X[][] Instance current = train.instance(i); Y[i] = (int) current.classValue(); // Class value starts from 0 weights[i] = current.weight(); // Dealing with weights totWeights += weights[i]; m_Data[i][0] = 1; int j = 1; for (int k = 0; k <= nR; k++) { if (k != m_ClassIndex) { double x = current.value(k); m_Data[i][j] = x; xMean[j] += weights[i] * x; xSD[j] += weights[i] * x * x; j++; } } // Class count sY[Y[i]]++; } if ((totWeights <= 1) && (nC > 1)) throw new Exception("Sum of weights of instances less than 1, please reweight!"); xMean[0] = 0; xSD[0] = 1; for (int j = 1; j <= nR; j++) { xMean[j] = xMean[j] / totWeights; if (totWeights > 1) xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1)); else xSD[j] = 0; } if (m_Debug) { // Output stats about input data System.out.println("Descriptives..."); for (int m = 0; m <= nK; m++) System.out.println(sY[m] + " cases have class " + m); System.out.println("\n Variable Avg SD "); for (int j = 1; j <= nR; j++) System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4) + Utils.doubleToString(xSD[j], 10, 4)); } // Normalise input data for (int i = 0; i < nC; i++) { for (int j = 0; j <= nR; j++) { if (xSD[j] != 0) { m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j]; } } } if (m_Debug) { System.out.println("\nIteration History..."); } double x[] = new double[(nR + 1) * nK]; double[][] b = new double[2][x.length]; // Boundary constraints, N/A here // Initialize for (int p = 0; p < nK; p++) { int offset = p * (nR + 1); x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0); // Null model b[0][offset] = Double.NaN; b[1][offset] = Double.NaN; for (int q = 1; q <= nR; q++) { x[offset + q] = 0.0; b[0][offset + q] = Double.NaN; b[1][offset + q] = Double.NaN; } } OptEng opt = new OptEng(); opt.setDebug(m_Debug); opt.setWeights(weights); opt.setClassLabels(Y); if (m_MaxIts == -1) { // Search until convergence x = opt.findArgmin(x, b); while (x == null) { x = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); x = opt.findArgmin(x, b); } if (m_Debug) System.out.println(" -------------<Converged>--------------"); } else { opt.setMaxIteration(m_MaxIts); x = opt.findArgmin(x, b); if (x == null) // Not enough, but use the current value x = opt.getVarbValues(); } m_LL = -opt.getMinFunction(); // Log-likelihood // Don't need data matrix anymore m_Data = null; // Convert coefficients back to non-normalized attribute units for (int i = 0; i < nK; i++) { m_Par[0][i] = x[i * (nR + 1)]; for (int j = 1; j <= nR; j++) { m_Par[j][i] = x[i * (nR + 1) + j]; if (xSD[j] != 0) { m_Par[j][i] /= xSD[j]; m_Par[0][i] -= m_Par[j][i] * xMean[j]; } } } }