List of usage examples for weka.core Instance weight
public double weight();
From source file:fantail.algorithms.AbstractRanker.java
License:Open Source License
public static double[] getAvgRankValues(Instances data) throws Exception { if (data.numInstances() == 0) { throw new Exception("data can't be empty."); }//from w w w. j a va 2 s . c om int numLabels = Tools.getNumberTargets(data); double[] avgVals = new double[numLabels]; for (int m = 0; m < data.numInstances(); m++) { Instance inst = data.instance(m); double[] targetValues = Tools.getTargetVector(inst); for (int j = 0; j < targetValues.length; j++) { avgVals[j] += (targetValues[j] * inst.weight()); } } for (int i = 0; i < avgVals.length; i++) { avgVals[i] /= data.numInstances(); } return avgVals; }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private double computeVariance(Instances data) throws Exception { double[][] targets = new double[data.numInstances()][]; for (int i = 0; i < data.numInstances(); i++) { targets[i] = Tools.getTargetVector(data.instance(i)); }//from w w w .j a v a 2 s .c o m double sumVar = 0; for (int i = 0; i < m_NumTargetLabels; i++) { double[] target_i = new double[data.numInstances()]; for (int j = 0; j < data.numInstances(); j++) { Instance metaInst = (Instance) data.instance(j); target_i[j] = targets[j][i] * metaInst.weight(); } sumVar += weka.core.Utils.variance(target_i); } return sumVar / m_NumTargetLabels; }
From source file:fantail.algorithms.RankingWithkNN.java
License:Open Source License
@Override public double[] recommendRanking(Instance inst) throws Exception { Instances nnbrs = m_kNN.getNearestNeighbourSearchAlgorithm().kNearestNeighbours(inst, m_K); double[] predictedRanks = new double[Tools.getNumberTargets(inst)]; double sumWeights = 0; for (int k = 0; k < m_K; k++) { Instance nn = (Instance) nnbrs.instance(k); sumWeights += nn.weight(); }// w w w . ja v a 2 s . c o m for (int k = 0; k < m_K; k++) { Instance nn = (Instance) nnbrs.instance(k); double[] rankingNN = Tools.getTargetVector(nn); for (int j = 0; j < predictedRanks.length; j++) { predictedRanks[j] += (rankingNN[j] * nn.weight() / sumWeights); } } return Tools.doubleArrayToRanking(predictedRanks); }
From source file:faster_pca.faster_pca.java
License:Open Source License
/** * Transform an instance in original (unormalized) format. * * @param instance an instance in the original (unormalized) format * @return a transformed instance/*from w w w . java2 s. c o m*/ * @throws Exception if instance can't be transformed */ protected Instance convertInstance(Instance instance) throws Exception { Instance result; double[] newVals; Instance tempInst; double cumulative; int i; int j; double tempval; int numAttsLowerBound; newVals = new double[m_OutputNumAtts]; tempInst = (Instance) instance.copy(); /*m_ReplaceMissingFilter.input(tempInst); m_ReplaceMissingFilter.batchFinished(); tempInst = m_ReplaceMissingFilter.output();*/ m_NominalToBinaryFilter.input(tempInst); m_NominalToBinaryFilter.batchFinished(); tempInst = m_NominalToBinaryFilter.output(); if (m_AttributeFilter != null) { m_AttributeFilter.input(tempInst); m_AttributeFilter.batchFinished(); tempInst = m_AttributeFilter.output(); } if (!super.getCenterData()) { tempInst = f_norm.filter(tempInst); } else { tempInst = f_center.filter(tempInst); } if (m_HasClass) { newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex()); } if (m_MaxAttributes > 0) { numAttsLowerBound = m_NumAttribs - m_MaxAttributes; } else { numAttsLowerBound = 0; } if (numAttsLowerBound < 0) { numAttsLowerBound = 0; } double tempInstCpy[] = new double[m_NumAttribs]; for (j = 0; j < m_NumAttribs; j++) { tempInstCpy[j] = tempInst.value(j); } cumulative = 0; for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) { tempval = 0.0; for (j = 0; j < m_NumAttribs; j++) { tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInstCpy[j]; } newVals[m_NumAttribs - i - 1] = tempval; cumulative += m_Eigenvalues[m_SortedEigens[i]]; if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance) { break; } } // create instance if (instance instanceof SparseInstance) { result = new SparseInstance(instance.weight(), newVals); } else { result = new DenseInstance(instance.weight(), newVals); } return result; }
From source file:feature.InfoGainEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Discretizes all * attributes that are numeric.// w w w . j a v a 2s . c o m * * @param data * set of instances serving as training data * @throws Exception * if the evaluator has not been generated successfully */ public double computeInfoGain(Instances data, int att) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute info gains m_InfoGains = new double[data.numAttributes()]; m_InfoGains[att] = (ContingencyTables.entropyOverColumns(counts[att]) - ContingencyTables.entropyConditionedOnRows(counts[att])); return m_InfoGains[att]; }
From source file:feature.InfoGainEval.java
License:Open Source License
public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else {// ww w. j ava2 s . c o m NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute info gains m_InfoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_InfoGains[i] = (ContingencyTables.entropyOverColumns(counts[i]) - ContingencyTables.entropyConditionedOnRows(counts[i])); } } }
From source file:ffnn.MultilayerPerceptron.java
License:Open Source License
/** * This function sets what the m_numeric flag to represent the passed class it * also performs the normalization of the attributes if applicable and sets up * the info to normalize the class. (note that regardless of the options it * will fill an array with the range and base, set to normalize all attributes * and the class to be between -1 and 1) * /*from www . java 2 s . c om*/ * @param inst the instances. * @return The modified instances. This needs to be done. If the attributes * are normalized then deep copies will be made of all the instances * which will need to be passed back out. */ private Instances setClassType(Instances inst) throws Exception { if (inst != null) { // x bounds m_attributeRanges = new double[inst.numAttributes()]; m_attributeBases = new double[inst.numAttributes()]; for (int noa = 0; noa < inst.numAttributes(); noa++) { double min = Double.POSITIVE_INFINITY; double max = Double.NEGATIVE_INFINITY; for (int i = 0; i < inst.numInstances(); i++) { if (!inst.instance(i).isMissing(noa)) { double value = inst.instance(i).value(noa); if (value < min) { min = value; } if (value > max) { max = value; } } } m_attributeRanges[noa] = (max - min) / 2; m_attributeBases[noa] = (max + min) / 2; } if (m_normalizeAttributes) { for (int i = 0; i < inst.numInstances(); i++) { Instance currentInstance = inst.instance(i); double[] instance = new double[inst.numAttributes()]; for (int noa = 0; noa < inst.numAttributes(); noa++) { if (noa != inst.classIndex()) { if (m_attributeRanges[noa] != 0) { instance[noa] = (currentInstance.value(noa) - m_attributeBases[noa]) / m_attributeRanges[noa]; } else { instance[noa] = currentInstance.value(noa) - m_attributeBases[noa]; } } else { instance[noa] = currentInstance.value(noa); } } inst.set(i, new DenseInstance(currentInstance.weight(), instance)); } } if (inst.classAttribute().isNumeric()) { m_numeric = true; } else { m_numeric = false; } } return inst; }
From source file:filters.MauiFilter.java
License:Open Source License
/** * Builds the classifier./*from www . java 2s. co m*/ */ private void buildClassifier() throws Exception { // Generate input format for classifier FastVector atts = new FastVector(); for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (i == documentAtt) { atts.addElement(new Attribute("Term_frequency")); // 2 atts.addElement(new Attribute("IDF")); // atts.addElement(new Attribute("TFxIDF")); // atts.addElement(new Attribute("First_occurrence")); // atts.addElement(new Attribute("Last_occurrence")); // atts.addElement(new Attribute("Spread")); // atts.addElement(new Attribute("Domain_keyphraseness")); // atts.addElement(new Attribute("Length")); // atts.addElement(new Attribute("Generality")); // atts.addElement(new Attribute("Node_degree")); // atts.addElement(new Attribute("Semantic_relatedness")); // atts.addElement(new Attribute("Wikipedia_keyphraseness")); // atts.addElement(new Attribute("Inverse_Wikip_frequency")); // atts.addElement(new Attribute("Total_Wikip_keyphraseness")); // 13 } else if (i == keyphrasesAtt) { if (nominalClassValue) { FastVector vals = new FastVector(2); vals.addElement("False"); vals.addElement("True"); atts.addElement(new Attribute("Keyphrase?", vals)); } else { atts.addElement(new Attribute("Keyphrase?")); } } } classifierData = new Instances("ClassifierData", atts, 0); classifierData.setClassIndex(numFeatures); if (debugMode) { System.err.println("--- Converting instances for classifier"); } int totalDocuments = getInputFormat().numInstances(); // Convert pending input instances into data for classifier for (int i = 0; i < totalDocuments; i++) { Instance current = getInputFormat().instance(i); // Get the key phrases for the document String keyphrases = current.stringValue(keyphrasesAtt); HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases); // Get the phrases for the document HashMap<String, Candidate> candidateList = allCandidates.get(current); // Compute the feature values for each phrase and // add the instance to the data for the classifier int countPos = 0; int countNeg = 0; if (debugMode) { System.err .println("--- Computing features for document " + i + " out of " + totalDocuments + "..."); } for (Candidate candidate : candidateList.values()) { // ignore all candidates that appear less than a threshold if (candidate.getFrequency() < minOccurFrequency) { continue; } // compute feature values double[] vals = computeFeatureValues(candidate, true, hashKeyphrases, candidateList); if (vals[vals.length - 1] == 0) { countNeg++; } else { countPos++; } Instance inst = new Instance(current.weight(), vals); // System.out.println(candidate + "\t" + inst); classifierData.add(inst); } if (debugMode) { System.err.println(countPos + " positive; " + countNeg + " negative instances"); } } if (debugMode) { System.err.println("--- Building classifier"); } if (classifier == null) { // Build classifier if (nominalClassValue) { // FilteredClassifier fclass = new FilteredClassifier(); // fclass.setClassifier(new NaiveBayesSimple()); // fclass.setFilter(new Discretize()); // classifier = fclass; classifier = new Bagging(); // try also // classifier.setOptions( Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2")); } else { classifier = new Bagging(); // try also // classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W // weka.classifiers.trees.J48 -- -U -M 2")) ; String optionsString = "-P 100 -S 1 -I 10 -W weka.classifiers.trees.M5P -- -U -M 7.0"; String[] options = Utils.splitOptions(optionsString); classifier.setOptions(options); } } FileOutputStream out = new FileOutputStream(new File("19docs.arff")); PrintWriter printer = new PrintWriter(out); printer.write(classifierData.toString()); printer.close(); out.close(); classifier.buildClassifier(classifierData); if (debugMode) { System.err.println(classifier); } // Save space classifierData = new Instances(classifierData, 0); }
From source file:filters.MauiFilter.java
License:Open Source License
/** * Converts an instance./*from w ww . j a va 2s. com*/ */ private FastVector convertInstance(Instance instance, boolean training) throws Exception { FastVector vector = new FastVector(); String fileName = instance.stringValue(fileNameAtt); if (debugMode) { System.err.println("-- Converting instance for document " + fileName); } // Get the key phrases for the document HashMap<String, Counter> hashKeyphrases = null; if (!instance.isMissing(keyphrasesAtt)) { String keyphrases = instance.stringValue(keyphrasesAtt); hashKeyphrases = getGivenKeyphrases(keyphrases); } // Get the document text String documentText = instance.stringValue(documentAtt); // Compute the candidate topics HashMap<String, Candidate> candidateList; if (allCandidates != null && allCandidates.containsKey(instance)) { candidateList = allCandidates.get(instance); } else { candidateList = getCandidates(documentText); } if (debugMode) { System.err.println(candidateList.size() + " candidates "); } // Set indices for key attributes int tfidfAttIndex = documentAtt + 2; int distAttIndex = documentAtt + 3; int probsAttIndex = documentAtt + numFeatures; int countPos = 0; int countNeg = 0; // Go through the phrases and convert them into instances for (Candidate candidate : candidateList.values()) { if (candidate.getFrequency() < minOccurFrequency) { continue; } String name = candidate.getName(); String orig = candidate.getBestFullForm(); if (!vocabularyName.equals("none")) { orig = candidate.getTitle(); } double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList); Instance inst = new Instance(instance.weight(), vals); inst.setDataset(classifierData); // Get probability of a phrase being key phrase double[] probs = classifier.distributionForInstance(inst); double prob = probs[0]; if (nominalClassValue) { prob = probs[1]; } // Compute attribute values for final instance double[] newInst = new double[instance.numAttributes() + numFeatures + 2]; int pos = 0; for (int i = 1; i < instance.numAttributes(); i++) { if (i == documentAtt) { // output of values for a given phrase: // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(name); newInst[pos++] = index; // Add original version if (orig != null) { index = outputFormatPeek().attribute(pos).addStringValue(orig); } else { index = outputFormatPeek().attribute(pos).addStringValue(name); } newInst[pos++] = index; // Add features newInst[pos++] = inst.value(tfIndex); newInst[pos++] = inst.value(idfIndex); newInst[pos++] = inst.value(tfidfIndex); newInst[pos++] = inst.value(firstOccurIndex); newInst[pos++] = inst.value(lastOccurIndex); newInst[pos++] = inst.value(spreadOccurIndex); newInst[pos++] = inst.value(domainKeyphIndex); newInst[pos++] = inst.value(lengthIndex); newInst[pos++] = inst.value(generalityIndex); newInst[pos++] = inst.value(nodeDegreeIndex); newInst[pos++] = inst.value(semRelIndex); newInst[pos++] = inst.value(wikipKeyphrIndex); newInst[pos++] = inst.value(invWikipFreqIndex); newInst[pos++] = inst.value(totalWikipKeyphrIndex); // Add probability probsAttIndex = pos; newInst[pos++] = prob; // Set rank to missing (computed below) newInst[pos++] = Instance.missingValue(); } else if (i == keyphrasesAtt) { newInst[pos++] = inst.classValue(); } else { newInst[pos++] = instance.value(i); } } Instance ins = new Instance(instance.weight(), newInst); ins.setDataset(outputFormatPeek()); vector.addElement(ins); if (inst.classValue() == 0) { countNeg++; } else { countPos++; } } if (debugMode) { System.err.println(countPos + " positive; " + countNeg + " negative instances"); } // Sort phrases according to their distance (stable sort) double[] vals = new double[vector.size()]; for (int i = 0; i < vals.length; i++) { vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex); } FastVector newVector = new FastVector(vector.size()); int[] sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their tfxidf value (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their probability (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Compute rank of phrases. Check for subphrases that are ranked // lower than superphrases and assign probability -1 and set the // rank to Integer.MAX_VALUE int rank = 1; for (int i = 0; i < vals.length; i++) { Instance currentInstance = (Instance) vector.elementAt(i); // Short cut: if phrase very unlikely make rank very low and // continue if (Utils.grOrEq(vals[i], 1.0)) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); continue; } // Otherwise look for super phrase starting with first phrase // in list that has same probability, TFxIDF value, and distance as // current phrase. We do this to catch all superphrases // that have same probability, TFxIDF value and distance as current // phrase. int startInd = i; while (startInd < vals.length) { Instance inst = (Instance) vector.elementAt(startInd); if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex)) || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex)) || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) { break; } startInd++; } currentInstance.setValue(probsAttIndex + 1, rank++); } return vector; }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Sets the class prior probabilities// ww w. j a v a 2s . c o m * * @param train the training instances used to determine * the prior probabilities * @exception Exception if the class attribute of the instances is not * set */ public void setPriors(Instances train) throws Exception { if (!m_ClassIsNominal) { m_NumTrainClassVals = 0; m_TrainClassVals = null; m_TrainClassWeights = null; m_PriorErrorEstimator = null; m_ErrorEstimator = null; for (int i = 0; i < train.numInstances(); i++) { Instance currentInst = train.instance(i); if (!currentInst.classIsMissing()) { addNumericTrainClass(currentInst.classValue(), currentInst.weight()); } } } else { for (int i = 0; i < m_NumClasses; i++) { m_ClassPriors[i] = 1; } m_ClassPriorsSum = m_NumClasses; for (int i = 0; i < train.numInstances(); i++) { if (!train.instance(i).classIsMissing()) { m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight(); m_ClassPriorsSum += train.instance(i).weight(); } } } }