List of usage examples for weka.core Instance value
public double value(Attribute att);
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
private void writePerfstoFile(Instance ins) { File perfFolder = new File(perfsfilepath); if (!perfFolder.exists()) perfFolder.mkdirs();//from www.j a va 2 s .c o m File file = new File(perfsfilepath + "/" + getMD5(ins)); BufferedWriter writer; try { writer = new BufferedWriter(new FileWriter(file)); writer.write(ins.value(ins.attribute(ins.numAttributes() - 1)) + "\n"); writer.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
public Instances runExp(Instances samplePoints, String perfAttName) { Instances retVal = null;// ww w . ja v a 2 s .c om if (samplePoints.attribute(perfAttName) == null) { Attribute performance = new Attribute(perfAttName); samplePoints.insertAttributeAt(performance, samplePoints.numAttributes()); } int pos = samplePoints.numInstances(); int count = 0; for (int i = 0; i < pos; i++) { Instance ins = samplePoints.get(i); HashMap hm = new HashMap(); int tot = 0; for (int j = 0; j < ins.numAttributes(); j++) { hm.put(ins.attribute(j).name(), ins.value(ins.attribute(j))); } boolean testRet; if (Double.isNaN(ins.value(ins.attribute(ins.numAttributes() - 1)))) { testRet = this.startTest(hm, i, isInterrupt); double y = 0; if (!testRet) {// the setting does not work, we skip it y = -1; count++; if (count >= targetTestErrorNum) { System.out.println( "There must be somthing wrong with the system. Please check and restart....."); System.exit(1); } } else { y = getPerformanceByType(performanceType); count = 0; } ins.setValue(samplePoints.numAttributes() - 1, y); writePerfstoFile(ins); } else { continue; } } retVal = samplePoints; retVal.setClassIndex(retVal.numAttributes() - 1); return retVal; }
From source file:cn.ict.zyq.bestConf.COMT2.COMT2.java
License:Open Source License
private static Instances getSiblings(M5P modelTree, Instance ins) { RuleNode node = modelTree.getM5RootNode(); while (!node.isLeaf()) { if (ins.value(node.splitAtt()) <= node.splitVal()) { node = node.leftNode();//from ww w. j a v a2s .c om } else { node = node.rightNode(); } } return node.zyqGetTrainingSet(); }
From source file:cn.ict.zyq.bestConf.COMT2.COMT2.java
License:Open Source License
private static double computeOmegaDelta(M5P model, M5P modelPi, Instances omega) throws Exception { double retval = 0., y; Enumeration<Instance> enu = omega.enumerateInstances(); int idxClass = omega.classIndex(); Instance ins; while (enu.hasMoreElements()) { ins = enu.nextElement();/* www. ja v a2 s . c om*/ y = ins.value(idxClass); retval += Math.pow(y - model.classifyInstance(ins), 2) - Math.pow(y - modelPi.classifyInstance(ins), 2); } return retval; }
From source file:com.actelion.research.orbit.imageAnalysis.models.ThresholdClassifier.java
License:Open Source License
@Override public double classifyInstance(Instance instance) throws Exception { if (instance == null) throw new IllegalArgumentException("instance cannot be null"); //System.out.println(threshs.length+" threshs: "+ Arrays.toString(threshs)); //System.out.println(instance.numAttributes()+" attributes: "+ Arrays.toString(instance.toDoubleArray())); if (mins == null || ((instance.numAttributes() - 1) != mins.length)) throw new IllegalStateException("thresholds length is not equal to feature length (mins=" + ((mins == null) ? "null" : mins.length) + " attributes-1=" + (instance.numAttributes() - 1) + ")"); int yes = 0, no = 0; for (int a = 0; a < instance.numAttributes() - 1; a++) { // -1 because last attribute is (missing) class value if (Double.isNaN(mins[a]) || Double.isNaN(maxs[a])) continue; // NaN means this dimension should be ignored if (instance.value(a) >= mins[a] && instance.value(a) <= maxs[a]) yes++;/*from w w w .ja v a 2 s .c o m*/ else no++; } if (yes > no) return 1; else return 0; // majority vote }
From source file:com.entopix.maui.filters.MauiFilter.java
License:Open Source License
/** * Converts an instance.// ww w. j a v a2 s.c om */ private FastVector convertInstance(Instance instance, boolean training) { FastVector vector = new FastVector(); String fileName = instance.stringValue(fileNameAtt); if (debugMode) { log.info("-- Converting instance for document " + fileName); } // Get the key phrases for the document HashMap<String, Counter> hashKeyphrases = null; if (!instance.isMissing(keyphrasesAtt)) { String keyphrases = instance.stringValue(keyphrasesAtt); hashKeyphrases = getGivenKeyphrases(keyphrases); } // Get the document text String documentText = instance.stringValue(documentAtt); // Compute the candidate topics HashMap<String, Candidate> candidateList; if (allCandidates != null && allCandidates.containsKey(instance)) { candidateList = allCandidates.get(instance); } else { candidateList = getCandidates(documentText); } if (debugMode) { log.info(candidateList.size() + " candidates "); } // Set indices for key attributes int tfidfAttIndex = documentAtt + 2; int distAttIndex = documentAtt + 3; int probsAttIndex = documentAtt + numFeatures; int countPos = 0; int countNeg = 0; // Go through the phrases and convert them into instances for (Candidate candidate : candidateList.values()) { if (candidate.getFrequency() < minOccurFrequency) { continue; } String name = candidate.getName(); String orig = candidate.getBestFullForm(); if (!vocabularyName.equals("none")) { orig = candidate.getTitle(); } double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList); Instance inst = new Instance(instance.weight(), vals); inst.setDataset(classifierData); double[] probs = null; try { // Get probability of a phrase being key phrase probs = classifier.distributionForInstance(inst); } catch (Exception e) { log.error("Exception while getting probability for candidate " + candidate.getName()); continue; } double prob = probs[0]; if (nominalClassValue) { prob = probs[1]; } // Compute attribute values for final instance double[] newInst = new double[instance.numAttributes() + numFeatures + 2]; int pos = 0; for (int i = 1; i < instance.numAttributes(); i++) { if (i == documentAtt) { // output of values for a given phrase: // 0 Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(name); newInst[pos++] = index; // 1 Add original version if (orig != null) { index = outputFormatPeek().attribute(pos).addStringValue(orig); } else { index = outputFormatPeek().attribute(pos).addStringValue(name); } // 2 newInst[pos++] = index; // Add features newInst[pos++] = inst.value(tfIndex); // 3 newInst[pos++] = inst.value(idfIndex); // 4 newInst[pos++] = inst.value(tfidfIndex); // 5 newInst[pos++] = inst.value(firstOccurIndex); // 6 newInst[pos++] = inst.value(lastOccurIndex); // 7 newInst[pos++] = inst.value(spreadOccurIndex); // 8 newInst[pos++] = inst.value(domainKeyphIndex); // 9 newInst[pos++] = inst.value(lengthIndex); // 10 newInst[pos++] = inst.value(generalityIndex); // 11 newInst[pos++] = inst.value(nodeDegreeIndex); // 12 newInst[pos++] = inst.value(invWikipFreqIndex); // 13 newInst[pos++] = inst.value(totalWikipKeyphrIndex); // 14 newInst[pos++] = inst.value(wikipGeneralityIndex); // 15 // Add probability probsAttIndex = pos; newInst[pos++] = prob; // 16 // Set rank to missing (computed below) newInst[pos++] = Instance.missingValue(); // 17 } else if (i == keyphrasesAtt) { newInst[pos++] = inst.classValue(); } else { newInst[pos++] = instance.value(i); } } Instance ins = new Instance(instance.weight(), newInst); ins.setDataset(outputFormatPeek()); vector.addElement(ins); if (inst.classValue() == 0) { countNeg++; } else { countPos++; } } if (debugMode) { log.info(countPos + " positive; " + countNeg + " negative instances"); } // Sort phrases according to their distance (stable sort) double[] vals = new double[vector.size()]; for (int i = 0; i < vals.length; i++) { vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex); } FastVector newVector = new FastVector(vector.size()); int[] sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their tfxidf value (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their probability (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Compute rank of phrases. Check for subphrases that are ranked // lower than superphrases and assign probability -1 and set the // rank to Integer.MAX_VALUE int rank = 1; for (int i = 0; i < vals.length; i++) { Instance currentInstance = (Instance) vector.elementAt(i); // log.info(vals[i] + "\t" + currentInstance); // Short cut: if phrase very unlikely make rank very low and // continue if (Utils.grOrEq(vals[i], 1.0)) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); continue; } // Otherwise look for super phrase starting with first phrase // in list that has same probability, TFxIDF value, and distance as // current phrase. We do this to catch all superphrases // that have same probability, TFxIDF value and distance as current // phrase. int startInd = i; while (startInd < vals.length) { Instance inst = (Instance) vector.elementAt(startInd); if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex)) || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex)) || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) { break; } startInd++; } currentInstance.setValue(probsAttIndex + 1, rank++); } return vector; }
From source file:com.esda.util.StringToWordVector.java
License:Open Source License
/** * Converts the instance w/o normalization. * * @oaram instance the instance to convert * @param v// w w w .jav a2 s.c o m * @return the conerted instance */ private int convertInstancewoDocNorm(Instance instance, FastVector v) { // Convert the instance into a sorted set of indexes TreeMap contained = new TreeMap(); // Copy all non-converted attributes from input to output int firstCopy = 0; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (!m_SelectedRange.isInRange(i)) { if (getInputFormat().attribute(i).type() != Attribute.STRING && getInputFormat().attribute(i).type() != Attribute.RELATIONAL) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { contained.put(new Integer(firstCopy), new Double(instance.value(i))); } } else { if (instance.isMissing(i)) { contained.put(new Integer(firstCopy), new Double(Double.NaN)); } else if (getInputFormat().attribute(i).type() == Attribute.STRING) { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(firstCopy).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(firstCopy) .addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(firstCopy) .addStringValue(instance.stringValue(i)); contained.put(new Integer(firstCopy), new Double(newIndex)); } else { // relational if (outputFormatPeek().attribute(firstCopy).numValues() == 0) { Instances relationalHeader = outputFormatPeek().attribute(firstCopy).relation(); // hack to defeat sparse instances bug outputFormatPeek().attribute(firstCopy).addRelation(relationalHeader); } int newIndex = outputFormatPeek().attribute(firstCopy) .addRelation(instance.relationalValue(i)); contained.put(new Integer(firstCopy), new Double(newIndex)); } } firstCopy++; } } for (int j = 0; j < instance.numAttributes(); j++) { // if ((getInputFormat().attribute(j).type() == Attribute.STRING) if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) { m_Tokenizer.tokenize(instance.stringValue(j)); while (m_Tokenizer.hasMoreElements()) { String word = (String) m_Tokenizer.nextElement(); if (this.m_lowerCaseTokens == true) word = word.toLowerCase(); word = m_Stemmer.stem(word); Integer index = (Integer) m_Dictionary.get(word); if (index != null) { if (m_OutputCounts) { // Separate if here rather than // two lines down to avoid // hashtable lookup Double count = (Double) contained.get(index); if (count != null) { contained.put(index, new Double(count.doubleValue() + 1.0)); } else { contained.put(index, new Double(1)); } } else { contained.put(index, new Double(1)); } } } } } // Doing TFTransform if (m_TFTransform == true) { Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); if (index.intValue() >= firstCopy) { double val = ((Double) contained.get(index)).doubleValue(); val = Math.log(val + 1); contained.put(index, new Double(val)); } } } // Doing IDFTransform if (m_IDFTransform == true) { Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); if (index.intValue() >= firstCopy) { double val = ((Double) contained.get(index)).doubleValue(); val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]); contained.put(index, new Double(val)); } } } // Convert the set to structures needed to create a sparse instance. double[] values = new double[contained.size()]; int[] indices = new int[contained.size()]; Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); Double value = (Double) contained.get(index); values[i] = value.doubleValue(); indices[i] = index.intValue(); } Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes()); inst.setDataset(outputFormatPeek()); v.addElement(inst); return firstCopy; }
From source file:com.github.polarisation.kea.main.KEAKeyphraseExtractor.java
License:Open Source License
/** * Builds the model from the files/*w ww. ja va2 s .co m*/ */ public void extractKeyphrases(Hashtable stems) throws Exception { Vector stats = new Vector(); // Check whether there is actually any data // = if there any files in the directory if (stems.size() == 0) { throw new Exception("Couldn't find any data!"); } m_KEAFilter.setNumPhrases(m_numPhrases); m_KEAFilter.setVocabulary(m_vocabulary); m_KEAFilter.setVocabularyFormat(m_vocabularyFormat); m_KEAFilter.setDocumentLanguage(getDocumentLanguage()); m_KEAFilter.setStemmer(m_Stemmer); m_KEAFilter.setStopwords(m_Stopwords); if (getVocabulary().equals("none")) { m_KEAFilter.m_NODEfeature = false; } else { m_KEAFilter.loadThesaurus(m_Stemmer, m_Stopwords); } FastVector atts = new FastVector(3); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); atts.addElement(new Attribute("filename", (String) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); if (m_KEAFilter.m_Dictionary == null) { buildGlobalDictionaries(stems); } System.err.println("-- Extracting Keyphrases... "); // Extract keyphrases Enumeration elem = stems.keys(); // Enumeration over all files in the directory (now in the hash): while (elem.hasMoreElements()) { String str = (String) elem.nextElement(); double[] newInst = new double[2]; try { File txt = new File(m_dirName + "/" + str + ".txt"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(txt), m_encoding); } else { is = new InputStreamReader(new FileInputStream(txt)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("Can't read document " + str + ".txt"); } newInst[0] = Instance.missingValue(); } try { File key = new File(m_dirName + "/" + str + ".key"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(key), m_encoding); } else { is = new InputStreamReader(new FileInputStream(key)); } StringBuffer keyStr = new StringBuffer(); int c; // keyStr = keyphrases in the str.key file // Kea assumes, that these keyphrases were assigned by the author // and evaluates extracted keyphrases againse these while ((c = is.read()) != -1) { keyStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("No existing keyphrases for stem " + str + "."); } newInst[1] = Instance.missingValue(); } data.add(new Instance(1.0, newInst)); m_KEAFilter.input(data.instance(0)); data = data.stringFreeStructure(); if (m_debug) { System.err.println("-- Document: " + str); } Instance[] topRankedInstances = new Instance[m_numPhrases]; Instance inst; // Iterating over all extracted keyphrases (inst) while ((inst = m_KEAFilter.output()) != null) { int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1; if (index < m_numPhrases) { topRankedInstances[index] = inst; } } if (m_debug) { System.err.println("-- Keyphrases and feature values:"); } FileOutputStream out = null; PrintWriter printer = null; File key = new File(m_dirName + "/" + str + ".key"); if (!key.exists()) { out = new FileOutputStream(m_dirName + "/" + str + ".key"); if (!m_encoding.equals("default")) { printer = new PrintWriter(new OutputStreamWriter(out, m_encoding)); } else { printer = new PrintWriter(out); } } double numExtracted = 0, numCorrect = 0; for (int i = 0; i < m_numPhrases; i++) { if (topRankedInstances[i] != null) { if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) { numExtracted += 1.0; } if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) { numCorrect += 1.0; } if (printer != null) { printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getUnstemmedPhraseIndex())); if (m_AdditionalInfo) { printer.print("\t"); printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getStemmedPhraseIndex())); printer.print("\t"); printer.print(Utils.doubleToString( topRankedInstances[i].value(m_KEAFilter.getProbabilityIndex()), 4)); } printer.println(); } if (m_debug) { System.err.println(topRankedInstances[i]); } } } if (numExtracted > 0) { if (m_debug) { System.err.println("-- " + numCorrect + " correct"); } stats.addElement(new Double(numCorrect)); } if (printer != null) { printer.flush(); printer.close(); out.close(); } } double[] st = new double[stats.size()]; for (int i = 0; i < stats.size(); i++) { st[i] = ((Double) stats.elementAt(i)).doubleValue(); } double avg = Utils.mean(st); double stdDev = Math.sqrt(Utils.variance(st)); System.err.println("Avg. number of matching keyphrases compared to existing ones : " + Utils.doubleToString(avg, 2) + " +/- " + Utils.doubleToString(stdDev, 2)); System.err.println("Based on " + stats.size() + " documents"); // m_KEAFilter.batchFinished(); }
From source file:com.mycompany.id3classifier.kNNClassifier.java
public static double getClassification(List<Instance> instances) { int index = instances.get(0).classIndex(); HashMap<Double, Integer> counts = new HashMap<>(); for (Instance instance : instances) { double val = instance.value(index); if (!counts.containsKey(val)) counts.put(val, 1); else {// w w w .ja v a2 s .c om counts.put(val, counts.get(val) + 1); } } int maxCount = 0; double maxValue = 0; for (Entry<Double, Integer> entry : counts.entrySet()) { if (entry.getValue() > maxCount) { maxCount = entry.getValue(); maxValue = entry.getKey(); } } return maxValue; }
From source file:com.mycompany.id3classifier.kNNClassifier.java
private static double findDistance(Instance instance1, Instance instance2) { double total = 0; int totalAttributes = instance1.numAttributes(); for (int i = 0; i < totalAttributes; i++) { if (instance1.classIndex() == i) continue; double difference = 0; if (instance1.attribute(i).isNumeric()) { difference = Math.abs(instance1.value(i) - instance2.value(i)); }// ww w . ja v a 2 s . com else { if (!instance1.stringValue(i).equals(instance2.stringValue(i))) { difference = 1; } } total += Math.pow(difference, totalAttributes); } return Math.pow(total, 1.0 / totalAttributes); }