List of usage examples for weka.core Instance classValue
public double classValue();
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Converts a set of instances to svm-light format * @param data the weka instances/*from w ww .j a v a 2 s . c om*/ * @return the weka instances in svm-light format */ public static String arffToSVMLight(Instance data, SVMLightLabelFormat labelFormat) { if (labelFormat == SVMLightLabelFormat.CLASSIFICATION && data.numClasses() != 2) { throw new IllegalArgumentException( "SVM-light classification label format requires that the data contain only two classes."); } String str = ""; String endline = System.getProperty("line.separator"); int numAttributes = data.numAttributes(); int classAttIdx = data.classIndex(); // convert the instance label if (labelFormat == SVMLightLabelFormat.CLASSIFICATION) { str += (data.classValue() == 0) ? "-1" : "1"; } else { str += data.classValue(); } str += " "; // convert each feature for (int attIdx = 0; attIdx < numAttributes; attIdx++) { // skip the class attribute if (attIdx == classAttIdx) continue; str += (attIdx + 1) + ":" + data.value(attIdx) + " "; } // append the instance info string str += "#"; str += endline; return str; }
From source file:elh.eus.absa.WekaWrapper.java
License:Open Source License
/** * Simple function to print the results of a multilabel prediction. * //from w w w . jav a 2s .co m * @param HashMap<Instance, double[]> pred hashmap containing a set of instances and their corresponding * multilabel prediction, as computed by the multiLabelPrediction() function in this class. */ public void printMultilabelPredictions(HashMap<Instance, double[]> pred) { for (Instance i : pred.keySet()) { double[] kk = pred.get(i); int c = 0; System.out.print("instance " + Integer.parseInt(Double.toString(i.value(0))) + " (" + i.classValue() + "|" + i.stringValue(i.classIndex()) + ") --> "); for (double d : kk) { System.out.print("cl_" + c + "=" + d + "; "); c++; } System.out.print("\n"); } }
From source file:etc.aloe.data.ExampleSet.java
License:Open Source License
/** * Gets the actual label of the given example. If the example is unlabeled, * returns null;/*from w ww . j a v a 2 s . c om*/ * * @param i * @return */ public Boolean getTrueLabel(int i) { Instance instance = instances.get(i); return getClassLabel(instance.classValue()); }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
private int[] determineDictionary(Instances instances) { if (stringAttributeIndex < 0) { throw new IllegalStateException("String attribute index not valid"); }/*from www . j a va 2s . c o m*/ // Operate on a per-class basis if class attribute is set int classInd = instances.classIndex(); int values = 1; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { values = instances.attribute(classInd).numValues(); } HashMap<String, Integer> termIndices = new HashMap<String, Integer>(); for (int i = 0; i < termList.size(); i++) { termIndices.put(termList.get(i), i); } //Create the trie for matching terms Trie termTrie = new Trie(termList); //Initialize the dictionary/count map ArrayList<HashMap<Integer, Count>> termCounts = new ArrayList<HashMap<Integer, Count>>(); for (int z = 0; z < values; z++) { termCounts.add(new HashMap<Integer, Count>()); } //Go through all the instances and count the emoticons for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); int vInd = 0; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { vInd = (int) instance.classValue(); } //Get the string attribute to examine String stringValue = instance.stringValue(stringAttributeIndex); HashMap<Integer, Count> termCountsForClass = termCounts.get(vInd); HashMap<String, Integer> termMatches = termTrie.countNonoverlappingMatches(stringValue); for (Map.Entry<String, Integer> entry : termMatches.entrySet()) { String term = entry.getKey(); int termIdx = termIndices.get(term); int matches = entry.getValue(); Count count = termCountsForClass.get(termIdx); if (count == null) { count = new Count(0); termCountsForClass.put(termIdx, count); } if (matches > 0) { count.docCount += 1; count.count += matches; } } } // Figure out the minimum required word frequency int prune[] = new int[values]; for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); int array[] = new int[termCountsForClass.size()]; int pos = 0; for (Map.Entry<Integer, Count> entry : termCountsForClass.entrySet()) { array[pos] = entry.getValue().count; pos++; } // sort the array sortArray(array); if (array.length < m_WordsToKeep) { // if there aren't enough words, set the threshold to // minFreq prune[z] = m_minTermFreq; } else { // otherwise set it to be at least minFreq prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]); } } // Add the word vector attributes (eliminating duplicates // that occur in multiple classes) HashSet<String> selectedTerms = new HashSet<String>(); for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); for (Map.Entry<Integer, Count> entry : termCountsForClass.entrySet()) { int termIndex = entry.getKey(); String term = termList.get(termIndex); Count count = entry.getValue(); if (count.count >= prune[z]) { selectedTerms.add(term); } } } //Save the selected terms as a list this.m_selectedTerms = new ArrayList<String>(selectedTerms); this.m_selectedTermsTrie = new Trie(this.m_selectedTerms); this.m_NumInstances = instances.size(); //Construct the selected terms to index map this.m_selectedTermIndices = new HashMap<String, Integer>(); for (int i = 0; i < m_selectedTerms.size(); i++) { m_selectedTermIndices.put(m_selectedTerms.get(i), i); } // Compute document frequencies, organized by selected term index (not original term index) int[] docsCounts = new int[m_selectedTerms.size()]; for (int i = 0; i < m_selectedTerms.size(); i++) { String term = m_selectedTerms.get(i); int termIndex = termIndices.get(term); int docsCount = 0; for (int z = 0; z < values; z++) { HashMap<Integer, Count> termCountsForClass = termCounts.get(z); Count count = termCountsForClass.get(termIndex); if (count != null) { docsCount += count.docCount; } } docsCounts[i] = docsCount; } return docsCounts; }
From source file:fcul.viegas.ml.learners.NetworkStreamLearningClassifierMapFunction.java
public InstanceStreamDTO map(InstanceStreamDTO instance) throws Exception { weka.core.Instance inst = instance.getInstance(); inst.setDataset(this.coreInstances); inst.setClassValue(inst.classValue()); inst = classifier.constructMappedInstance(inst); HoeffdingTree tree = (HoeffdingTree) classifier.getClassifier(); double[] classe = tree.distributionForInstance(inst); instance.setInstance(null);//from w ww . ja v a 2 s .c om //System.out.println("\t classe[0]: " + classe[0] + " classe[1]: " + classe[1]); if (classe[0] > classe[1]) { instance.setAssignedClassValueFromLearner(0.0d); } else { instance.setAssignedClassValueFromLearner(1.0d); } return instance; }
From source file:feature.InfoGainEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Discretizes all * attributes that are numeric.//from w w w . ja v a2 s . c om * * @param data * set of instances serving as training data * @throws Exception * if the evaluator has not been generated successfully */ public double computeInfoGain(Instances data, int att) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute info gains m_InfoGains = new double[data.numAttributes()]; m_InfoGains[att] = (ContingencyTables.entropyOverColumns(counts[att]) - ContingencyTables.entropyConditionedOnRows(counts[att])); return m_InfoGains[att]; }
From source file:feature.InfoGainEval.java
License:Open Source License
public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else {//from ww w . j ava2s . co m NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute info gains m_InfoGains = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_InfoGains[i] = (ContingencyTables.entropyOverColumns(counts[i]) - ContingencyTables.entropyConditionedOnRows(counts[i])); } } }
From source file:filters.MauiFilter.java
License:Open Source License
/** * Converts an instance./* w w w. j a v a 2 s. c om*/ */ private FastVector convertInstance(Instance instance, boolean training) throws Exception { FastVector vector = new FastVector(); String fileName = instance.stringValue(fileNameAtt); if (debugMode) { System.err.println("-- Converting instance for document " + fileName); } // Get the key phrases for the document HashMap<String, Counter> hashKeyphrases = null; if (!instance.isMissing(keyphrasesAtt)) { String keyphrases = instance.stringValue(keyphrasesAtt); hashKeyphrases = getGivenKeyphrases(keyphrases); } // Get the document text String documentText = instance.stringValue(documentAtt); // Compute the candidate topics HashMap<String, Candidate> candidateList; if (allCandidates != null && allCandidates.containsKey(instance)) { candidateList = allCandidates.get(instance); } else { candidateList = getCandidates(documentText); } if (debugMode) { System.err.println(candidateList.size() + " candidates "); } // Set indices for key attributes int tfidfAttIndex = documentAtt + 2; int distAttIndex = documentAtt + 3; int probsAttIndex = documentAtt + numFeatures; int countPos = 0; int countNeg = 0; // Go through the phrases and convert them into instances for (Candidate candidate : candidateList.values()) { if (candidate.getFrequency() < minOccurFrequency) { continue; } String name = candidate.getName(); String orig = candidate.getBestFullForm(); if (!vocabularyName.equals("none")) { orig = candidate.getTitle(); } double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList); Instance inst = new Instance(instance.weight(), vals); inst.setDataset(classifierData); // Get probability of a phrase being key phrase double[] probs = classifier.distributionForInstance(inst); double prob = probs[0]; if (nominalClassValue) { prob = probs[1]; } // Compute attribute values for final instance double[] newInst = new double[instance.numAttributes() + numFeatures + 2]; int pos = 0; for (int i = 1; i < instance.numAttributes(); i++) { if (i == documentAtt) { // output of values for a given phrase: // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(name); newInst[pos++] = index; // Add original version if (orig != null) { index = outputFormatPeek().attribute(pos).addStringValue(orig); } else { index = outputFormatPeek().attribute(pos).addStringValue(name); } newInst[pos++] = index; // Add features newInst[pos++] = inst.value(tfIndex); newInst[pos++] = inst.value(idfIndex); newInst[pos++] = inst.value(tfidfIndex); newInst[pos++] = inst.value(firstOccurIndex); newInst[pos++] = inst.value(lastOccurIndex); newInst[pos++] = inst.value(spreadOccurIndex); newInst[pos++] = inst.value(domainKeyphIndex); newInst[pos++] = inst.value(lengthIndex); newInst[pos++] = inst.value(generalityIndex); newInst[pos++] = inst.value(nodeDegreeIndex); newInst[pos++] = inst.value(semRelIndex); newInst[pos++] = inst.value(wikipKeyphrIndex); newInst[pos++] = inst.value(invWikipFreqIndex); newInst[pos++] = inst.value(totalWikipKeyphrIndex); // Add probability probsAttIndex = pos; newInst[pos++] = prob; // Set rank to missing (computed below) newInst[pos++] = Instance.missingValue(); } else if (i == keyphrasesAtt) { newInst[pos++] = inst.classValue(); } else { newInst[pos++] = instance.value(i); } } Instance ins = new Instance(instance.weight(), newInst); ins.setDataset(outputFormatPeek()); vector.addElement(ins); if (inst.classValue() == 0) { countNeg++; } else { countPos++; } } if (debugMode) { System.err.println(countPos + " positive; " + countNeg + " negative instances"); } // Sort phrases according to their distance (stable sort) double[] vals = new double[vector.size()]; for (int i = 0; i < vals.length; i++) { vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex); } FastVector newVector = new FastVector(vector.size()); int[] sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their tfxidf value (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their probability (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Compute rank of phrases. Check for subphrases that are ranked // lower than superphrases and assign probability -1 and set the // rank to Integer.MAX_VALUE int rank = 1; for (int i = 0; i < vals.length; i++) { Instance currentInstance = (Instance) vector.elementAt(i); // Short cut: if phrase very unlikely make rank very low and // continue if (Utils.grOrEq(vals[i], 1.0)) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); continue; } // Otherwise look for super phrase starting with first phrase // in list that has same probability, TFxIDF value, and distance as // current phrase. We do this to catch all superphrases // that have same probability, TFxIDF value and distance as current // phrase. int startInd = i; while (startInd < vals.length) { Instance inst = (Instance) vector.elementAt(startInd); if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex)) || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex)) || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) { break; } startInd++; } currentInstance.setValue(probsAttIndex + 1, rank++); } return vector; }
From source file:functions.kernelPerceptron.java
License:Open Source License
@Override public void trainOnInstanceImpl(Instance inst) { //Init Perceptron if (this.reset == true) { this.reset = false; this.numberAttributes = inst.numAttributes(); this.numberClasses = inst.numClasses(); this.weightAttribute = new double[inst.numClasses()][inst.numAttributes()]; for (int i = 0; i < inst.numClasses(); i++) { for (int j = 0; j < inst.numAttributes(); j++) { weightAttribute[i][j] = 0.2 * this.classifierRandom.nextDouble() - 0.1; }//from w ww.j a va2 s . c o m } } double[] preds = new double[inst.numClasses()]; for (int i = 0; i < inst.numClasses(); i++) { preds[i] = prediction(inst, i); } double learningRatio = learningRatioOption.getValue(); int actualClass = (int) inst.classValue(); for (int i = 0; i < inst.numClasses(); i++) { double actual = (i == actualClass) ? 1.0 : 0.0; double delta = (actual - preds[i]) * preds[i] * (1 - preds[i]); for (int j = 0; j < inst.numAttributes() - 1; j++) { this.weightAttribute[i][j] += learningRatio * delta * inst.value(j); } //this.weightAttribute[i][inst.numAttributes() - 1] += learningRatio * delta; } }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Sets the class prior probabilities/*from w ww.j a va 2 s. c o m*/ * * @param train the training instances used to determine * the prior probabilities * @exception Exception if the class attribute of the instances is not * set */ public void setPriors(Instances train) throws Exception { if (!m_ClassIsNominal) { m_NumTrainClassVals = 0; m_TrainClassVals = null; m_TrainClassWeights = null; m_PriorErrorEstimator = null; m_ErrorEstimator = null; for (int i = 0; i < train.numInstances(); i++) { Instance currentInst = train.instance(i); if (!currentInst.classIsMissing()) { addNumericTrainClass(currentInst.classValue(), currentInst.weight()); } } } else { for (int i = 0; i < m_NumClasses; i++) { m_ClassPriors[i] = 1; } m_ClassPriorsSum = m_NumClasses; for (int i = 0; i < train.numInstances(); i++) { if (!train.instance(i).classIsMissing()) { m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight(); m_ClassPriorsSum += train.instance(i).weight(); } } } }