List of usage examples for weka.core FastVector setElementAt
public final void setElementAt(E element, int index)
From source file:ArrayLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set./* w w w.j av a2s. co m*/ * * @return the structure of the data set as an empty set of Instances * @exception IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (m_data == null) { throw new IOException("No source has been specified"); } if (m_structure == null) { getStructure(); } m_cumulativeStructure = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { m_cumulativeStructure.addElement(new Hashtable()); } m_cumulativeInstances = new FastVector(); FastVector current; for (int i = 0; i < m_data.length; i++) { current = getInstance(m_data[i]); m_cumulativeInstances.addElement(current); } FastVector atts = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { String attname = m_structure.attribute(i).name(); Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i)); if (tempHash.size() == 0) { atts.addElement(new Attribute(attname)); } else { if (m_StringAttributes.isInRange(i)) { atts.addElement(new Attribute(attname, (FastVector) null)); } else { FastVector values = new FastVector(tempHash.size()); // add dummy objects in order to make the FastVector's size == capacity for (int z = 0; z < tempHash.size(); z++) { values.addElement("dummy"); } Enumeration e = tempHash.keys(); while (e.hasMoreElements()) { Object ob = e.nextElement(); // if (ob instanceof Double) { int index = ((Integer) tempHash.get(ob)).intValue(); String s = ob.toString(); if (s.startsWith("'") || s.startsWith("\"")) s = s.substring(1, s.length() - 1); values.setElementAt(new String(s), index); // } } atts.addElement(new Attribute(attname, values)); } } } // make the instances String relationName; relationName = "ArrayData"; Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size()); for (int i = 0; i < m_cumulativeInstances.size(); i++) { current = ((FastVector) m_cumulativeInstances.elementAt(i)); double[] vals = new double[dataSet.numAttributes()]; for (int j = 0; j < current.size(); j++) { Object cval = current.elementAt(j); if (cval instanceof String) { if (((String) cval).compareTo(m_MissingValue) == 0) { vals[j] = Instance.missingValue(); } else { if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue((String) cval); } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else { throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!"); } } } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue("" + cval); } else { vals[j] = ((Double) cval).doubleValue(); } } dataSet.add(new Instance(1.0, vals)); } m_structure = new Instances(dataSet, 0); m_cumulativeStructure = null; // conserve memory return dataSet; }
From source file:de.uni_potsdam.hpi.bpt.promnicat.analysisModules.clustering.ProcessInstances.java
License:Open Source License
/** * Create a copy of the structure if the data has string or relational * attributes, "cleanses" string types (i.e. doesn't contain references to * the strings seen in the past) and all relational attributes. * //from w w w. j av a2 s .co m * @return a copy of the instance structure. */ public ProcessInstances stringFreeStructure() { FastVector newAtts = new FastVector(); for (int i = 0; i < m_Attributes.size(); i++) { Attribute att = (Attribute) m_Attributes.elementAt(i); if (att.type() == Attribute.STRING) { newAtts.addElement(new Attribute(att.name(), (FastVector) null, i)); } else if (att.type() == Attribute.RELATIONAL) { newAtts.addElement( new Attribute(att.name(), new ProcessInstances((ProcessInstances) att.relation(), 0), i)); } } if (newAtts.size() == 0) { return new ProcessInstances(this, 0); } FastVector atts = (FastVector) m_Attributes.copy(); for (int i = 0; i < newAtts.size(); i++) { atts.setElementAt(newAtts.elementAt(i), ((Attribute) newAtts.elementAt(i)).index()); } ProcessInstances result = new ProcessInstances(this, 0); result.m_Attributes = atts; return result; }
From source file:kea.KEAFilter.java
License:Open Source License
/** * Expects an empty hashtable. Fills the hashtable * with the stemmed n-grams occuring in the given string * (as keys). Stores the position, the number of occurences, * and the most commonly occurring orgininal version of * each n-gram.// ww w .j a va2s. c o m * * N-grams that occur less than m_MinNumOccur are not used. * * Returns the total number of words (!) in the string. */ private int getPhrases(HashMap hash, String str) { String[] buffer = new String[m_MaxPhraseLength]; StringTokenizer tok = new StringTokenizer(str, "\n"); int pos = 1; while (tok.hasMoreTokens()) { String phrase = tok.nextToken(); int numSeen = 0; StringTokenizer wordTok = new StringTokenizer(phrase, " "); while (wordTok.hasMoreTokens()) { String word = wordTok.nextToken(); // Store word in buffer for (int i = 0; i < m_MaxPhraseLength - 1; i++) { buffer[i] = buffer[i + 1]; } buffer[m_MaxPhraseLength - 1] = word; // How many are buffered? numSeen++; if (numSeen > m_MaxPhraseLength) { numSeen = m_MaxPhraseLength; } // Don't consider phrases that end with a stop word if (m_Stopwords.isStopword(buffer[m_MaxPhraseLength - 1])) { pos++; continue; } // Loop through buffer and add phrases to hashtable StringBuffer phraseBuffer = new StringBuffer(); for (int i = 1; i <= numSeen; i++) { if (i > 1) { phraseBuffer.insert(0, ' '); } phraseBuffer.insert(0, buffer[m_MaxPhraseLength - i]); // Don't consider phrases that begin with a stop word if ((i > 1) && (m_Stopwords.isStopword(buffer[m_MaxPhraseLength - i]))) { continue; } // Only consider phrases with minimum length if (i >= m_MinPhraseLength) { // Stem string String phrStr = phraseBuffer.toString(); String internal = internalFormat(phrStr); FastVector vec = (FastVector) hash.get(internal); if (vec == null) { vec = new FastVector(3); // HashMap for storing all versions HashMap secHash = new HashMap(); secHash.put(phrStr, new Counter()); // Update hashtable with all the info vec.addElement(new Counter(pos + 1 - i)); vec.addElement(new Counter()); vec.addElement(secHash); hash.put(internal, vec); } else { // Update number of occurrences ((Counter) ((FastVector) vec).elementAt(1)).increment(); // Update hashtable storing different versions HashMap secHash = (HashMap) vec.elementAt(2); Counter count = (Counter) secHash.get(phrStr); if (count == null) { secHash.put(phrStr, new Counter()); } else { count.increment(); } } } } pos++; } } // Replace secondary hashtables with most commonly occurring // version of each phrase (canonical) form. Delete all words // that are proper nouns. Iterator phrases = hash.keySet().iterator(); while (phrases.hasNext()) { String phrase = (String) phrases.next(); FastVector info = (FastVector) hash.get(phrase); // Occurring less than m_MinNumOccur? if (((Counter) ((FastVector) info).elementAt(1)).value() < m_MinNumOccur) { phrases.remove(); continue; } // Get canonical form String canForm = canonicalForm((HashMap) info.elementAt(2)); if (canForm == null) { phrases.remove(); } else { info.setElementAt(canForm, 2); } } return pos; }