List of usage examples for weka.core FastVector addElement
public final void addElement(E element)
From source file:DocClassifier.java
private FastVector createTerms(File[] files) { try {//w w w . j a v a 2 s . c o m Set<String> termSet = new HashSet<String>(); for (File file : files) { BufferedReader reader = new BufferedReader(new FileReader(file)); Set<String> docTermSet = new HashSet<String>(); while (reader.ready()) { String line = reader.readLine(); String[] words = line.split(" "); for (String word : words) { Kelime[] kelimeler = this.zemberek.kelimeCozumle(word); if (kelimeler.length > 0) { String kok = kelimeler[0].kok().icerik(); docTermSet.add(kok); termSet.add(kok); } } } // DF for a doc for (String t : docTermSet) { Double freq = this.idfMap.get(t); this.idfMap.put(t, ((freq != null) ? (freq + 1) : 1)); } reader.close(); } //Remove some words like ve,veya,de,da,in from set termSet = PreProcesser.filterTermSet(termSet); //IDF Calculation for (String t : termSet) { Double df = this.idfMap.get(t); if (df != null) { this.idfMap.put(t, Math.log(files.length / df) / Math.log(2)); } else { this.idfMap.put(t, 0.0); } //System.out.println(t + ": " + df); } // Attribute creation //System.err.println("\nAttribute:"); FastVector terms = new FastVector(); for (String term : termSet) { terms.addElement(new Attribute(term)); // System.err.println(term + "-"); } // Class values are created Set<String> classSet = new HashSet<String>(); for (File file : files) { classSet.add(file.getName().substring(0, 3).toLowerCase()); } //System.err.println("\nClass:"); this.classValues = new FastVector(); for (String category : classSet) { this.classValues.addElement(category); // System.out.print(category + "-"); } terms.addElement(new Attribute(CLASS_ATTR_NAME, classValues)); return terms; } catch (FileNotFoundException ex) { Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:TextDirectoryLoader.java
License:Open Source License
/** * Determines and returns (if possible) the structure (internally the * header) of the data set as an empty set of instances. * * @return the structure of the data set as an empty * set of Instances// w w w . jav a 2 s. co m * @throws IOException if an error occurs */ public Instances getStructure() throws IOException { if (getDirectory() == null) { throw new IOException("No directory/source has been specified"); } // determine class labels, i.e., sub-dirs if (m_structure == null) { String directoryPath = getDirectory().getAbsolutePath(); FastVector atts = new FastVector(); FastVector classes = new FastVector(); File dir = new File(directoryPath); String[] subdirs = dir.list(); for (int i = 0; i < subdirs.length; i++) { File subdir = new File(directoryPath + File.separator + subdirs[i]); if (subdir.isDirectory()) classes.addElement(subdirs[i]); } atts.addElement(new Attribute("text", (FastVector) null)); if (m_OutputFilename) atts.addElement(new Attribute("filename", (FastVector) null)); // make sure that the name of the class attribute is unlikely to // clash with any attribute created via the StringToWordVector filter atts.addElement(new Attribute("@@class@@", classes)); String relName = directoryPath.replaceAll("/", "_"); relName = relName.replaceAll("\\\\", "_").replaceAll(":", "_"); m_structure = new Instances(relName, atts, 0); m_structure.setClassIndex(m_structure.numAttributes() - 1); } return m_structure; }
From source file:TextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set.//from w ww .j a va2 s . c om * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); FastVector classes = new FastVector(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.addElement(enm.nextElement()); Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.elementAt(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]); double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); BufferedReader is; if (m_charSet == null || m_charSet.length() == 0) { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt))); } else { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) newInst[1] = (double) data.attribute(1) .addStringValue(subdirPath + File.separator + files[j]); newInst[data.classIndex()] = (double) k; data.add(new Instance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]); } } } return data; }
From source file:ArrayLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set.//from w ww. jav a2s. c om * * @return the structure of the data set as an empty set of Instances * @exception IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (m_data == null) { throw new IOException("No source has been specified"); } if (m_structure == null) { getStructure(); } m_cumulativeStructure = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { m_cumulativeStructure.addElement(new Hashtable()); } m_cumulativeInstances = new FastVector(); FastVector current; for (int i = 0; i < m_data.length; i++) { current = getInstance(m_data[i]); m_cumulativeInstances.addElement(current); } FastVector atts = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { String attname = m_structure.attribute(i).name(); Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i)); if (tempHash.size() == 0) { atts.addElement(new Attribute(attname)); } else { if (m_StringAttributes.isInRange(i)) { atts.addElement(new Attribute(attname, (FastVector) null)); } else { FastVector values = new FastVector(tempHash.size()); // add dummy objects in order to make the FastVector's size == capacity for (int z = 0; z < tempHash.size(); z++) { values.addElement("dummy"); } Enumeration e = tempHash.keys(); while (e.hasMoreElements()) { Object ob = e.nextElement(); // if (ob instanceof Double) { int index = ((Integer) tempHash.get(ob)).intValue(); String s = ob.toString(); if (s.startsWith("'") || s.startsWith("\"")) s = s.substring(1, s.length() - 1); values.setElementAt(new String(s), index); // } } atts.addElement(new Attribute(attname, values)); } } } // make the instances String relationName; relationName = "ArrayData"; Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size()); for (int i = 0; i < m_cumulativeInstances.size(); i++) { current = ((FastVector) m_cumulativeInstances.elementAt(i)); double[] vals = new double[dataSet.numAttributes()]; for (int j = 0; j < current.size(); j++) { Object cval = current.elementAt(j); if (cval instanceof String) { if (((String) cval).compareTo(m_MissingValue) == 0) { vals[j] = Instance.missingValue(); } else { if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue((String) cval); } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else { throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!"); } } } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue("" + cval); } else { vals[j] = ((Double) cval).doubleValue(); } } dataSet.add(new Instance(1.0, vals)); } m_structure = new Instances(dataSet, 0); m_cumulativeStructure = null; // conserve memory return dataSet; }
From source file:ArrayLoader.java
License:Open Source License
/** * Attempts to parse a line of the data set. * * @param tokenizer the tokenizer/*from ww w .j a va2s. c o m*/ * @return a FastVector containg String and Double objects representing * the values of the instance. * @exception IOException if an error occurs * * <pre><jml> * private_normal_behavior * requires: tokenizer != null; * ensures: \result != null; * also * private_exceptional_behavior * requires: tokenizer == null * || (* unsucessful parse *); * signals: (IOException); * </jml></pre> */ private FastVector getInstance(String[] data) throws IOException { FastVector current = new FastVector(); for (int i = 0; i < data.length; i++) { if (data[i].equals(m_MissingValue)) { current.addElement(new String(m_MissingValue)); } else { // try to parse as a number try { double val = Double.valueOf(data[i]).doubleValue(); current.addElement(new Double(val)); } catch (NumberFormatException e) { // otherwise assume its an enumerated value current.addElement(new String(data[i])); } } } // check number of values read if (current.size() != m_structure.numAttributes()) { System.out.println("Incorrect Structure"); } // check for structure update try { checkStructure(current); } catch (Exception ex) { ex.printStackTrace(); } return current; }
From source file:ArrayLoader.java
License:Open Source License
/** * Assumes the first line of the file contains the attribute names. * Assumes all attributes are real (Reading the full data set with * getDataSet will establish the true structure). * *//*from w w w . j ava 2s .com*/ private void readHeader(String[] column) throws IOException { FastVector attribNames = new FastVector(); // Assume first row of data are the column titles for (int i = 0; i < column.length; i++) { attribNames.addElement(new Attribute(column[i])); } m_structure = new Instances("DataArray", attribNames, 0); }
From source file:LabeledItemSet.java
License:Open Source License
/** * Deletes all item sets that don't have minimum support and have more than maximum support * @return the reduced set of item sets//w w w. j a v a 2s. com * @param maxSupport the maximum support * @param itemSets the set of item sets to be pruned * @param minSupport the minimum number of transactions to be covered */ public static FastVector deleteItemSets(FastVector itemSets, int minSupport, int maxSupport) { FastVector newVector = new FastVector(itemSets.size()); for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet current = (LabeledItemSet) itemSets.elementAt(i); if ((current.m_ruleSupCounter >= minSupport) && (current.m_ruleSupCounter <= maxSupport)) newVector.addElement(current); } return newVector; }
From source file:LabeledItemSet.java
License:Open Source License
/** * Merges all item sets in the set of (k-1)-item sets * to create the (k)-item sets and updates the counters. * @return the generated (k)-item sets/*from w w w. j a v a 2s . co m*/ * @param totalTrans the total number of transactions * @param itemSets the set of (k-1)-item sets * @param size the value of (k-1) */ public static FastVector mergeAllItemSets(FastVector itemSets, int size, int totalTrans) { FastVector newVector = new FastVector(); LabeledItemSet result; int numFound, k; for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet first = (LabeledItemSet) itemSets.elementAt(i); out: for (int j = i + 1; j < itemSets.size(); j++) { LabeledItemSet second = (LabeledItemSet) itemSets.elementAt(j); while (first.m_classLabel != second.m_classLabel) { j++; if (j == itemSets.size()) break out; second = (LabeledItemSet) itemSets.elementAt(j); } result = new LabeledItemSet(totalTrans, first.m_classLabel); result.m_items = new int[first.m_items.length]; // Find and copy common prefix of size 'size' numFound = 0; k = 0; while (numFound < size) { if (first.m_items[k] == second.m_items[k]) { if (first.m_items[k] != -1) numFound++; result.m_items[k] = first.m_items[k]; } else break out; k++; } // Check difference while (k < first.m_items.length) { if ((first.m_items[k] != -1) && (second.m_items[k] != -1)) break; else { if (first.m_items[k] != -1) result.m_items[k] = first.m_items[k]; else result.m_items[k] = second.m_items[k]; } k++; } if (k == first.m_items.length) { result.m_ruleSupCounter = 0; result.m_counter = 0; newVector.addElement(result); } } } return newVector; }
From source file:LabeledItemSet.java
License:Open Source License
/** * Prunes a set of (k)-item sets using the given (k-1)-item sets. * * @param toPrune the set of (k)-item sets to be pruned * @param kMinusOne the (k-1)-item sets to be used for pruning * @return the pruned set of item sets/* w w w . j a va2s.co m*/ */ public static FastVector pruneItemSets(FastVector toPrune, Hashtable kMinusOne) { FastVector newVector = new FastVector(toPrune.size()); int help, j; for (int i = 0; i < toPrune.size(); i++) { LabeledItemSet current = (LabeledItemSet) toPrune.elementAt(i); for (j = 0; j < current.m_items.length; j++) { if (current.m_items[j] != -1) { help = current.m_items[j]; current.m_items[j] = -1; if (kMinusOne.get(current) != null && (current.m_classLabel == (((Integer) kMinusOne.get(current)).intValue()))) current.m_items[j] = help; else { current.m_items[j] = help; break; } } } if (j == current.m_items.length) newVector.addElement(current); } return newVector; }
From source file:LabeledItemSet.java
License:Open Source License
/** * Generates rules out of item sets/*w ww .j a v a 2 s . c o m*/ * @param minConfidence the minimum confidence * @param noPrune flag indicating whether the rules are pruned accoridng to the minimum confidence value * @return a set of rules */ public final FastVector[] generateRules(double minConfidence, boolean noPrune) { FastVector premises = new FastVector(), consequences = new FastVector(), conf = new FastVector(); FastVector[] rules = new FastVector[3]; ItemSet premise, consequence; // Generate all rules with class in the consequence. premise = new ItemSet(m_totalTransactions); consequence = new ItemSet(m_totalTransactions); int[] premiseItems = new int[m_items.length]; int[] consequenceItems = new int[1]; System.arraycopy(m_items, 0, premiseItems, 0, m_items.length); consequence.setItem(consequenceItems); premise.setItem(premiseItems); consequence.setItemAt(m_classLabel, 0); consequence.setCounter(this.m_ruleSupCounter); premise.setCounter(this.m_counter); premises.addElement(premise); consequences.addElement(consequence); conf.addElement(new Double((double) this.m_ruleSupCounter / (double) this.m_counter)); rules[0] = premises; rules[1] = consequences; rules[2] = conf; if (!noPrune) pruneRules(rules, minConfidence); return rules; }