Example usage for weka.core FastVector addElement

List of usage examples for weka.core FastVector addElement

Introduction

In this page you can find the example usage for weka.core FastVector addElement.

Prototype

public final void addElement(E element) 

Source Link

Document

Adds an element to this vector.

Usage

From source file:DocClassifier.java

private FastVector createTerms(File[] files) {
    try {//w  w w  .  j  a v a 2  s . c  o  m
        Set<String> termSet = new HashSet<String>();
        for (File file : files) {
            BufferedReader reader = new BufferedReader(new FileReader(file));
            Set<String> docTermSet = new HashSet<String>();
            while (reader.ready()) {
                String line = reader.readLine();
                String[] words = line.split(" ");
                for (String word : words) {
                    Kelime[] kelimeler = this.zemberek.kelimeCozumle(word);
                    if (kelimeler.length > 0) {
                        String kok = kelimeler[0].kok().icerik();
                        docTermSet.add(kok);
                        termSet.add(kok);
                    }
                }
            }
            // DF for a doc
            for (String t : docTermSet) {
                Double freq = this.idfMap.get(t);
                this.idfMap.put(t, ((freq != null) ? (freq + 1) : 1));
            }
            reader.close();
        }
        //Remove some words like ve,veya,de,da,in from set
        termSet = PreProcesser.filterTermSet(termSet);
        //IDF Calculation
        for (String t : termSet) {
            Double df = this.idfMap.get(t);
            if (df != null) {
                this.idfMap.put(t, Math.log(files.length / df) / Math.log(2));
            } else {
                this.idfMap.put(t, 0.0);
            }
            //System.out.println(t + ": " + df);
        }
        // Attribute creation
        //System.err.println("\nAttribute:");
        FastVector terms = new FastVector();
        for (String term : termSet) {
            terms.addElement(new Attribute(term));
            // System.err.println(term + "-");
        }
        // Class values are created
        Set<String> classSet = new HashSet<String>();
        for (File file : files) {
            classSet.add(file.getName().substring(0, 3).toLowerCase());
        }
        //System.err.println("\nClass:");
        this.classValues = new FastVector();
        for (String category : classSet) {
            this.classValues.addElement(category);
            // System.out.print(category + "-");
        }
        terms.addElement(new Attribute(CLASS_ATTR_NAME, classValues));
        return terms;
    } catch (FileNotFoundException ex) {
        Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:TextDirectoryLoader.java

License:Open Source License

/**
 * Determines and returns (if possible) the structure (internally the 
 * header) of the data set as an empty set of instances.
 *
 * @return          the structure of the data set as an empty 
 *             set of Instances//  w  w  w . jav  a  2 s.  co  m
 * @throws IOException    if an error occurs
 */
public Instances getStructure() throws IOException {
    if (getDirectory() == null) {
        throw new IOException("No directory/source has been specified");
    }

    // determine class labels, i.e., sub-dirs
    if (m_structure == null) {
        String directoryPath = getDirectory().getAbsolutePath();
        FastVector atts = new FastVector();
        FastVector classes = new FastVector();

        File dir = new File(directoryPath);
        String[] subdirs = dir.list();

        for (int i = 0; i < subdirs.length; i++) {
            File subdir = new File(directoryPath + File.separator + subdirs[i]);
            if (subdir.isDirectory())
                classes.addElement(subdirs[i]);
        }

        atts.addElement(new Attribute("text", (FastVector) null));
        if (m_OutputFilename)
            atts.addElement(new Attribute("filename", (FastVector) null));
        // make sure that the name of the class attribute is unlikely to 
        // clash with any attribute created via the StringToWordVector filter
        atts.addElement(new Attribute("@@class@@", classes));

        String relName = directoryPath.replaceAll("/", "_");
        relName = relName.replaceAll("\\\\", "_").replaceAll(":", "_");
        m_structure = new Instances(relName, atts, 0);
        m_structure.setClassIndex(m_structure.numAttributes() - 1);
    }

    return m_structure;
}

From source file:TextDirectoryLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined
 * by a call to getStructure then method should do so before processing
 * the rest of the data set.//from   w  ww  .j  a va2  s  . c  om
 *
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (getDirectory() == null)
        throw new IOException("No directory/source has been specified");

    String directoryPath = getDirectory().getAbsolutePath();
    FastVector classes = new FastVector();
    Enumeration enm = getStructure().classAttribute().enumerateValues();
    while (enm.hasMoreElements())
        classes.addElement(enm.nextElement());

    Instances data = getStructure();
    int fileCount = 0;
    for (int k = 0; k < classes.size(); k++) {
        String subdirPath = (String) classes.elementAt(k);
        File subdir = new File(directoryPath + File.separator + subdirPath);
        String[] files = subdir.list();
        for (int j = 0; j < files.length; j++) {
            try {
                fileCount++;
                if (getDebug())
                    System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]);

                double[] newInst = null;
                if (m_OutputFilename)
                    newInst = new double[3];
                else
                    newInst = new double[2];
                File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]);
                BufferedReader is;
                if (m_charSet == null || m_charSet.length() == 0) {
                    is = new BufferedReader(new InputStreamReader(new FileInputStream(txt)));
                } else {
                    is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet));
                }

                StringBuffer txtStr = new StringBuffer();
                int c;
                while ((c = is.read()) != -1) {
                    txtStr.append((char) c);
                }

                newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());
                if (m_OutputFilename)
                    newInst[1] = (double) data.attribute(1)
                            .addStringValue(subdirPath + File.separator + files[j]);
                newInst[data.classIndex()] = (double) k;
                data.add(new Instance(1.0, newInst));
                is.close();
            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath
                        + File.separator + files[j]);
            }
        }
    }

    return data;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined
 * by a call to getStructure then method should do so before processing
 * the rest of the data set.//from   w  ww.  jav  a2s.  c om
 *
 * @return the structure of the data set as an empty set of Instances
 * @exception IOException if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (m_data == null) {
        throw new IOException("No source has been specified");
    }

    if (m_structure == null) {
        getStructure();
    }

    m_cumulativeStructure = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        m_cumulativeStructure.addElement(new Hashtable());
    }

    m_cumulativeInstances = new FastVector();
    FastVector current;

    for (int i = 0; i < m_data.length; i++) {
        current = getInstance(m_data[i]);

        m_cumulativeInstances.addElement(current);
    }

    FastVector atts = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        String attname = m_structure.attribute(i).name();
        Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i));
        if (tempHash.size() == 0) {
            atts.addElement(new Attribute(attname));
        } else {
            if (m_StringAttributes.isInRange(i)) {
                atts.addElement(new Attribute(attname, (FastVector) null));
            } else {
                FastVector values = new FastVector(tempHash.size());
                // add dummy objects in order to make the FastVector's size == capacity
                for (int z = 0; z < tempHash.size(); z++) {
                    values.addElement("dummy");
                }
                Enumeration e = tempHash.keys();
                while (e.hasMoreElements()) {
                    Object ob = e.nextElement();
                    //     if (ob instanceof Double) {
                    int index = ((Integer) tempHash.get(ob)).intValue();
                    String s = ob.toString();
                    if (s.startsWith("'") || s.startsWith("\""))
                        s = s.substring(1, s.length() - 1);
                    values.setElementAt(new String(s), index);
                    //     }
                }
                atts.addElement(new Attribute(attname, values));
            }
        }
    }

    // make the instances
    String relationName;
    relationName = "ArrayData";
    Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size());

    for (int i = 0; i < m_cumulativeInstances.size(); i++) {
        current = ((FastVector) m_cumulativeInstances.elementAt(i));
        double[] vals = new double[dataSet.numAttributes()];
        for (int j = 0; j < current.size(); j++) {
            Object cval = current.elementAt(j);
            if (cval instanceof String) {
                if (((String) cval).compareTo(m_MissingValue) == 0) {
                    vals[j] = Instance.missingValue();
                } else {
                    if (dataSet.attribute(j).isString()) {
                        vals[j] = dataSet.attribute(j).addStringValue((String) cval);
                    } else if (dataSet.attribute(j).isNominal()) {
                        // find correct index
                        Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                        int index = ((Integer) lookup.get(cval)).intValue();
                        vals[j] = index;
                    } else {
                        throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!");
                    }
                }
            } else if (dataSet.attribute(j).isNominal()) {
                // find correct index
                Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                int index = ((Integer) lookup.get(cval)).intValue();
                vals[j] = index;
            } else if (dataSet.attribute(j).isString()) {
                vals[j] = dataSet.attribute(j).addStringValue("" + cval);
            } else {
                vals[j] = ((Double) cval).doubleValue();
            }
        }
        dataSet.add(new Instance(1.0, vals));
    }
    m_structure = new Instances(dataSet, 0);
    m_cumulativeStructure = null; // conserve memory

    return dataSet;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Attempts to parse a line of the data set.
 *
 * @param tokenizer the tokenizer/*from  ww  w .j  a  va2s. c  o  m*/
 * @return a FastVector containg String and Double objects representing
 * the values of the instance.
 * @exception IOException if an error occurs
 *
 * <pre><jml>
 *    private_normal_behavior
 *      requires: tokenizer != null;
 *      ensures: \result  != null;
 *  also
 *    private_exceptional_behavior
 *      requires: tokenizer == null
 *                || (* unsucessful parse *);
 *      signals: (IOException);
 * </jml></pre>
 */
private FastVector getInstance(String[] data) throws IOException {

    FastVector current = new FastVector();

    for (int i = 0; i < data.length; i++) {
        if (data[i].equals(m_MissingValue)) {
            current.addElement(new String(m_MissingValue));
        } else {
            // try to parse as a number
            try {
                double val = Double.valueOf(data[i]).doubleValue();
                current.addElement(new Double(val));
            } catch (NumberFormatException e) {
                // otherwise assume its an enumerated value
                current.addElement(new String(data[i]));
            }
        }
    }

    // check number of values read
    if (current.size() != m_structure.numAttributes()) {
        System.out.println("Incorrect Structure");
    }

    // check for structure update
    try {
        checkStructure(current);
    } catch (Exception ex) {
        ex.printStackTrace();
    }

    return current;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Assumes the first line of the file contains the attribute names.
 * Assumes all attributes are real (Reading the full data set with
 * getDataSet will establish the true structure).
 *
 *//*from  w w  w  . j  ava 2s .com*/
private void readHeader(String[] column) throws IOException {

    FastVector attribNames = new FastVector();

    // Assume first row of data are the column titles
    for (int i = 0; i < column.length; i++) {
        attribNames.addElement(new Attribute(column[i]));
    }

    m_structure = new Instances("DataArray", attribNames, 0);
}

From source file:LabeledItemSet.java

License:Open Source License

/**
 * Deletes all item sets that don't have minimum support and have more than maximum support
 * @return the reduced set of item sets//w  w w. j a  v a 2s.  com
 * @param maxSupport the maximum support
 * @param itemSets the set of item sets to be pruned
 * @param minSupport the minimum number of transactions to be covered
 */
public static FastVector deleteItemSets(FastVector itemSets, int minSupport, int maxSupport) {

    FastVector newVector = new FastVector(itemSets.size());

    for (int i = 0; i < itemSets.size(); i++) {
        LabeledItemSet current = (LabeledItemSet) itemSets.elementAt(i);
        if ((current.m_ruleSupCounter >= minSupport) && (current.m_ruleSupCounter <= maxSupport))
            newVector.addElement(current);
    }
    return newVector;
}

From source file:LabeledItemSet.java

License:Open Source License

/**
 * Merges all item sets in the set of (k-1)-item sets
 * to create the (k)-item sets and updates the counters.
 * @return the generated (k)-item sets/*from w w w. j a  v  a  2s  . co m*/
 * @param totalTrans the total number of transactions
 * @param itemSets the set of (k-1)-item sets
 * @param size the value of (k-1)
 */
public static FastVector mergeAllItemSets(FastVector itemSets, int size, int totalTrans) {

    FastVector newVector = new FastVector();
    LabeledItemSet result;
    int numFound, k;

    for (int i = 0; i < itemSets.size(); i++) {
        LabeledItemSet first = (LabeledItemSet) itemSets.elementAt(i);
        out: for (int j = i + 1; j < itemSets.size(); j++) {
            LabeledItemSet second = (LabeledItemSet) itemSets.elementAt(j);
            while (first.m_classLabel != second.m_classLabel) {
                j++;
                if (j == itemSets.size())
                    break out;
                second = (LabeledItemSet) itemSets.elementAt(j);
            }
            result = new LabeledItemSet(totalTrans, first.m_classLabel);
            result.m_items = new int[first.m_items.length];

            // Find and copy common prefix of size 'size'
            numFound = 0;
            k = 0;
            while (numFound < size) {
                if (first.m_items[k] == second.m_items[k]) {
                    if (first.m_items[k] != -1)
                        numFound++;
                    result.m_items[k] = first.m_items[k];
                } else
                    break out;
                k++;
            }

            // Check difference
            while (k < first.m_items.length) {
                if ((first.m_items[k] != -1) && (second.m_items[k] != -1))
                    break;
                else {
                    if (first.m_items[k] != -1)
                        result.m_items[k] = first.m_items[k];
                    else
                        result.m_items[k] = second.m_items[k];
                }
                k++;
            }
            if (k == first.m_items.length) {
                result.m_ruleSupCounter = 0;
                result.m_counter = 0;
                newVector.addElement(result);
            }
        }
    }

    return newVector;
}

From source file:LabeledItemSet.java

License:Open Source License

/**
* Prunes a set of (k)-item sets using the given (k-1)-item sets.
*
* @param toPrune the set of (k)-item sets to be pruned
* @param kMinusOne the (k-1)-item sets to be used for pruning
* @return the pruned set of item sets/*  w w w . j a va2s.co  m*/
*/
public static FastVector pruneItemSets(FastVector toPrune, Hashtable kMinusOne) {

    FastVector newVector = new FastVector(toPrune.size());
    int help, j;

    for (int i = 0; i < toPrune.size(); i++) {
        LabeledItemSet current = (LabeledItemSet) toPrune.elementAt(i);

        for (j = 0; j < current.m_items.length; j++) {
            if (current.m_items[j] != -1) {
                help = current.m_items[j];
                current.m_items[j] = -1;
                if (kMinusOne.get(current) != null
                        && (current.m_classLabel == (((Integer) kMinusOne.get(current)).intValue())))
                    current.m_items[j] = help;
                else {
                    current.m_items[j] = help;
                    break;
                }
            }
        }
        if (j == current.m_items.length)
            newVector.addElement(current);
    }
    return newVector;
}

From source file:LabeledItemSet.java

License:Open Source License

/**
 * Generates rules out of item sets/*w  ww  .j a  v  a  2 s  .  c  o m*/
 * @param minConfidence the minimum confidence
 * @param noPrune flag indicating whether the rules are pruned accoridng to the minimum confidence value
 * @return a set of rules
 */
public final FastVector[] generateRules(double minConfidence, boolean noPrune) {

    FastVector premises = new FastVector(), consequences = new FastVector(), conf = new FastVector();
    FastVector[] rules = new FastVector[3];
    ItemSet premise, consequence;

    // Generate all rules with class in the consequence. 
    premise = new ItemSet(m_totalTransactions);
    consequence = new ItemSet(m_totalTransactions);
    int[] premiseItems = new int[m_items.length];
    int[] consequenceItems = new int[1];
    System.arraycopy(m_items, 0, premiseItems, 0, m_items.length);
    consequence.setItem(consequenceItems);
    premise.setItem(premiseItems);
    consequence.setItemAt(m_classLabel, 0);
    consequence.setCounter(this.m_ruleSupCounter);
    premise.setCounter(this.m_counter);
    premises.addElement(premise);
    consequences.addElement(consequence);
    conf.addElement(new Double((double) this.m_ruleSupCounter / (double) this.m_counter));

    rules[0] = premises;
    rules[1] = consequences;
    rules[2] = conf;
    if (!noPrune)
        pruneRules(rules, minConfidence);

    return rules;
}