Example usage for weka.core FastVector FastVector

List of usage examples for weka.core FastVector FastVector

Introduction

In this page you can find the example usage for weka.core FastVector FastVector.

Prototype

public FastVector() 

Source Link

Document

Constructs an empty vector with initial capacity zero.

Usage

From source file:DocClassifier.java

private FastVector createTerms(File[] files) {
    try {/*from  w  w  w .j av a  2s  .co m*/
        Set<String> termSet = new HashSet<String>();
        for (File file : files) {
            BufferedReader reader = new BufferedReader(new FileReader(file));
            Set<String> docTermSet = new HashSet<String>();
            while (reader.ready()) {
                String line = reader.readLine();
                String[] words = line.split(" ");
                for (String word : words) {
                    Kelime[] kelimeler = this.zemberek.kelimeCozumle(word);
                    if (kelimeler.length > 0) {
                        String kok = kelimeler[0].kok().icerik();
                        docTermSet.add(kok);
                        termSet.add(kok);
                    }
                }
            }
            // DF for a doc
            for (String t : docTermSet) {
                Double freq = this.idfMap.get(t);
                this.idfMap.put(t, ((freq != null) ? (freq + 1) : 1));
            }
            reader.close();
        }
        //Remove some words like ve,veya,de,da,in from set
        termSet = PreProcesser.filterTermSet(termSet);
        //IDF Calculation
        for (String t : termSet) {
            Double df = this.idfMap.get(t);
            if (df != null) {
                this.idfMap.put(t, Math.log(files.length / df) / Math.log(2));
            } else {
                this.idfMap.put(t, 0.0);
            }
            //System.out.println(t + ": " + df);
        }
        // Attribute creation
        //System.err.println("\nAttribute:");
        FastVector terms = new FastVector();
        for (String term : termSet) {
            terms.addElement(new Attribute(term));
            // System.err.println(term + "-");
        }
        // Class values are created
        Set<String> classSet = new HashSet<String>();
        for (File file : files) {
            classSet.add(file.getName().substring(0, 3).toLowerCase());
        }
        //System.err.println("\nClass:");
        this.classValues = new FastVector();
        for (String category : classSet) {
            this.classValues.addElement(category);
            // System.out.print(category + "-");
        }
        terms.addElement(new Attribute(CLASS_ATTR_NAME, classValues));
        return terms;
    } catch (FileNotFoundException ex) {
        Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:TextDirectoryLoader.java

License:Open Source License

/**
 * Determines and returns (if possible) the structure (internally the 
 * header) of the data set as an empty set of instances.
 *
 * @return          the structure of the data set as an empty 
 *             set of Instances//from  www . j ava  2s.co  m
 * @throws IOException    if an error occurs
 */
public Instances getStructure() throws IOException {
    if (getDirectory() == null) {
        throw new IOException("No directory/source has been specified");
    }

    // determine class labels, i.e., sub-dirs
    if (m_structure == null) {
        String directoryPath = getDirectory().getAbsolutePath();
        FastVector atts = new FastVector();
        FastVector classes = new FastVector();

        File dir = new File(directoryPath);
        String[] subdirs = dir.list();

        for (int i = 0; i < subdirs.length; i++) {
            File subdir = new File(directoryPath + File.separator + subdirs[i]);
            if (subdir.isDirectory())
                classes.addElement(subdirs[i]);
        }

        atts.addElement(new Attribute("text", (FastVector) null));
        if (m_OutputFilename)
            atts.addElement(new Attribute("filename", (FastVector) null));
        // make sure that the name of the class attribute is unlikely to 
        // clash with any attribute created via the StringToWordVector filter
        atts.addElement(new Attribute("@@class@@", classes));

        String relName = directoryPath.replaceAll("/", "_");
        relName = relName.replaceAll("\\\\", "_").replaceAll(":", "_");
        m_structure = new Instances(relName, atts, 0);
        m_structure.setClassIndex(m_structure.numAttributes() - 1);
    }

    return m_structure;
}

From source file:TextDirectoryLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined
 * by a call to getStructure then method should do so before processing
 * the rest of the data set./*  w  ww  .  j  a v  a2s .  co m*/
 *
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (getDirectory() == null)
        throw new IOException("No directory/source has been specified");

    String directoryPath = getDirectory().getAbsolutePath();
    FastVector classes = new FastVector();
    Enumeration enm = getStructure().classAttribute().enumerateValues();
    while (enm.hasMoreElements())
        classes.addElement(enm.nextElement());

    Instances data = getStructure();
    int fileCount = 0;
    for (int k = 0; k < classes.size(); k++) {
        String subdirPath = (String) classes.elementAt(k);
        File subdir = new File(directoryPath + File.separator + subdirPath);
        String[] files = subdir.list();
        for (int j = 0; j < files.length; j++) {
            try {
                fileCount++;
                if (getDebug())
                    System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]);

                double[] newInst = null;
                if (m_OutputFilename)
                    newInst = new double[3];
                else
                    newInst = new double[2];
                File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]);
                BufferedReader is;
                if (m_charSet == null || m_charSet.length() == 0) {
                    is = new BufferedReader(new InputStreamReader(new FileInputStream(txt)));
                } else {
                    is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet));
                }

                StringBuffer txtStr = new StringBuffer();
                int c;
                while ((c = is.read()) != -1) {
                    txtStr.append((char) c);
                }

                newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());
                if (m_OutputFilename)
                    newInst[1] = (double) data.attribute(1)
                            .addStringValue(subdirPath + File.separator + files[j]);
                newInst[data.classIndex()] = (double) k;
                data.add(new Instance(1.0, newInst));
                is.close();
            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath
                        + File.separator + files[j]);
            }
        }
    }

    return data;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Attempts to parse a line of the data set.
 *
 * @param tokenizer the tokenizer//  w w  w.j  a  v  a 2  s .c o  m
 * @return a FastVector containg String and Double objects representing
 * the values of the instance.
 * @exception IOException if an error occurs
 *
 * <pre><jml>
 *    private_normal_behavior
 *      requires: tokenizer != null;
 *      ensures: \result  != null;
 *  also
 *    private_exceptional_behavior
 *      requires: tokenizer == null
 *                || (* unsucessful parse *);
 *      signals: (IOException);
 * </jml></pre>
 */
private FastVector getInstance(String[] data) throws IOException {

    FastVector current = new FastVector();

    for (int i = 0; i < data.length; i++) {
        if (data[i].equals(m_MissingValue)) {
            current.addElement(new String(m_MissingValue));
        } else {
            // try to parse as a number
            try {
                double val = Double.valueOf(data[i]).doubleValue();
                current.addElement(new Double(val));
            } catch (NumberFormatException e) {
                // otherwise assume its an enumerated value
                current.addElement(new String(data[i]));
            }
        }
    }

    // check number of values read
    if (current.size() != m_structure.numAttributes()) {
        System.out.println("Incorrect Structure");
    }

    // check for structure update
    try {
        checkStructure(current);
    } catch (Exception ex) {
        ex.printStackTrace();
    }

    return current;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Assumes the first line of the file contains the attribute names.
 * Assumes all attributes are real (Reading the full data set with
 * getDataSet will establish the true structure).
 *
 *//*from w w w  .  ja  v a 2 s  .c  o  m*/
private void readHeader(String[] column) throws IOException {

    FastVector attribNames = new FastVector();

    // Assume first row of data are the column titles
    for (int i = 0; i < column.length; i++) {
        attribNames.addElement(new Attribute(column[i]));
    }

    m_structure = new Instances("DataArray", attribNames, 0);
}

From source file:LabeledItemSet.java

License:Open Source License

/**
 * Merges all item sets in the set of (k-1)-item sets
 * to create the (k)-item sets and updates the counters.
 * @return the generated (k)-item sets/*w w  w .  ja v a2s  . c o  m*/
 * @param totalTrans the total number of transactions
 * @param itemSets the set of (k-1)-item sets
 * @param size the value of (k-1)
 */
public static FastVector mergeAllItemSets(FastVector itemSets, int size, int totalTrans) {

    FastVector newVector = new FastVector();
    LabeledItemSet result;
    int numFound, k;

    for (int i = 0; i < itemSets.size(); i++) {
        LabeledItemSet first = (LabeledItemSet) itemSets.elementAt(i);
        out: for (int j = i + 1; j < itemSets.size(); j++) {
            LabeledItemSet second = (LabeledItemSet) itemSets.elementAt(j);
            while (first.m_classLabel != second.m_classLabel) {
                j++;
                if (j == itemSets.size())
                    break out;
                second = (LabeledItemSet) itemSets.elementAt(j);
            }
            result = new LabeledItemSet(totalTrans, first.m_classLabel);
            result.m_items = new int[first.m_items.length];

            // Find and copy common prefix of size 'size'
            numFound = 0;
            k = 0;
            while (numFound < size) {
                if (first.m_items[k] == second.m_items[k]) {
                    if (first.m_items[k] != -1)
                        numFound++;
                    result.m_items[k] = first.m_items[k];
                } else
                    break out;
                k++;
            }

            // Check difference
            while (k < first.m_items.length) {
                if ((first.m_items[k] != -1) && (second.m_items[k] != -1))
                    break;
                else {
                    if (first.m_items[k] != -1)
                        result.m_items[k] = first.m_items[k];
                    else
                        result.m_items[k] = second.m_items[k];
                }
                k++;
            }
            if (k == first.m_items.length) {
                result.m_ruleSupCounter = 0;
                result.m_counter = 0;
                newVector.addElement(result);
            }
        }
    }

    return newVector;
}

From source file:LabeledItemSet.java

License:Open Source License

/**
 * Converts the header info of the given set of instances into a set
 * of item sets (singletons). The ordering of values in the header file
 * determines the lexicographic order. Each item set knows its class label.
 * @return a set of item sets, each containing a single item
 * @param instancesNoClass instances without the class attribute
 * @param classes the values of the class attribute sorted according to instances
 * @exception Exception if singletons can't be generated successfully
 *///from w  w  w  . j a  v a  2 s.  co  m
public static FastVector singletons(Instances instancesNoClass, Instances classes) throws Exception {

    FastVector cSet, setOfItemSets = new FastVector();
    LabeledItemSet current;

    //make singletons
    for (int i = 0; i < instancesNoClass.numAttributes(); i++) {
        if (instancesNoClass.attribute(i).isNumeric())
            throw new Exception("Can't handle numeric attributes!");
        for (int j = 0; j < instancesNoClass.attribute(i).numValues(); j++) {
            for (int k = 0; k < (classes.attribute(0)).numValues(); k++) {
                current = new LabeledItemSet(instancesNoClass.numInstances(), k);
                current.m_items = new int[instancesNoClass.numAttributes()];
                for (int l = 0; l < instancesNoClass.numAttributes(); l++)
                    current.m_items[l] = -1;
                current.m_items[i] = j;
                setOfItemSets.addElement(current);
            }
        }
    }
    return setOfItemSets;
}

From source file:LabeledItemSet.java

License:Open Source License

/**
 * Generates rules out of item sets//from   w w  w .  j  a v  a2  s .  co m
 * @param minConfidence the minimum confidence
 * @param noPrune flag indicating whether the rules are pruned accoridng to the minimum confidence value
 * @return a set of rules
 */
public final FastVector[] generateRules(double minConfidence, boolean noPrune) {

    FastVector premises = new FastVector(), consequences = new FastVector(), conf = new FastVector();
    FastVector[] rules = new FastVector[3];
    ItemSet premise, consequence;

    // Generate all rules with class in the consequence. 
    premise = new ItemSet(m_totalTransactions);
    consequence = new ItemSet(m_totalTransactions);
    int[] premiseItems = new int[m_items.length];
    int[] consequenceItems = new int[1];
    System.arraycopy(m_items, 0, premiseItems, 0, m_items.length);
    consequence.setItem(consequenceItems);
    premise.setItem(premiseItems);
    consequence.setItemAt(m_classLabel, 0);
    consequence.setCounter(this.m_ruleSupCounter);
    premise.setCounter(this.m_counter);
    premises.addElement(premise);
    consequences.addElement(consequence);
    conf.addElement(new Double((double) this.m_ruleSupCounter / (double) this.m_counter));

    rules[0] = premises;
    rules[1] = consequences;
    rules[2] = conf;
    if (!noPrune)
        pruneRules(rules, minConfidence);

    return rules;
}

From source file:MultiClassClassifier.java

License:Open Source License

/**
 * Builds the classifiers.//from w  w w .  j av  a2s.  c o  m
 *
 * @param insts the training data.
 * @throws Exception if a classifier can't be built
 */
public void buildClassifier(Instances insts) throws Exception {

    Instances newInsts;

    // can classifier handle the data?
    getCapabilities().testWithFail(insts);

    // remove instances with missing class
    insts = new Instances(insts);
    insts.deleteWithMissingClass();

    if (m_Classifier == null) {
        throw new Exception("No base classifier has been set!");
    }
    m_ZeroR = new ZeroR();
    m_ZeroR.buildClassifier(insts);

    m_TwoClassDataset = null;

    int numClassifiers = insts.numClasses();
    if (numClassifiers <= 2) {

        m_Classifiers = Classifier.makeCopies(m_Classifier, 1);
        m_Classifiers[0].buildClassifier(insts);

        m_ClassFilters = null;

    } else if (m_Method == METHOD_1_AGAINST_1) {
        // generate fastvector of pairs
        FastVector pairs = new FastVector();
        for (int i = 0; i < insts.numClasses(); i++) {
            for (int j = 0; j < insts.numClasses(); j++) {
                if (j <= i)
                    continue;
                int[] pair = new int[2];
                pair[0] = i;
                pair[1] = j;
                pairs.addElement(pair);
            }
        }

        numClassifiers = pairs.size();
        m_Classifiers = Classifier.makeCopies(m_Classifier, numClassifiers);
        m_ClassFilters = new Filter[numClassifiers];
        m_SumOfWeights = new double[numClassifiers];

        // generate the classifiers
        for (int i = 0; i < numClassifiers; i++) {
            RemoveWithValues classFilter = new RemoveWithValues();
            classFilter.setAttributeIndex("" + (insts.classIndex() + 1));
            classFilter.setModifyHeader(true);
            classFilter.setInvertSelection(true);
            classFilter.setNominalIndicesArr((int[]) pairs.elementAt(i));
            Instances tempInstances = new Instances(insts, 0);
            tempInstances.setClassIndex(-1);
            classFilter.setInputFormat(tempInstances);
            newInsts = Filter.useFilter(insts, classFilter);
            if (newInsts.numInstances() > 0) {
                newInsts.setClassIndex(insts.classIndex());
                m_Classifiers[i].buildClassifier(newInsts);
                m_ClassFilters[i] = classFilter;
                m_SumOfWeights[i] = newInsts.sumOfWeights();
            } else {
                m_Classifiers[i] = null;
                m_ClassFilters[i] = null;
            }
        }

        // construct a two-class header version of the dataset
        m_TwoClassDataset = new Instances(insts, 0);
        int classIndex = m_TwoClassDataset.classIndex();
        m_TwoClassDataset.setClassIndex(-1);
        m_TwoClassDataset.deleteAttributeAt(classIndex);
        FastVector classLabels = new FastVector();
        classLabels.addElement("class0");
        classLabels.addElement("class1");
        m_TwoClassDataset.insertAttributeAt(new Attribute("class", classLabels), classIndex);
        m_TwoClassDataset.setClassIndex(classIndex);

    } else {
        // use error correcting code style methods
        Code code = null;
        switch (m_Method) {
        case METHOD_ERROR_EXHAUSTIVE:
            code = new ExhaustiveCode(numClassifiers);
            break;
        case METHOD_ERROR_RANDOM:
            code = new RandomCode(numClassifiers, (int) (numClassifiers * m_RandomWidthFactor), insts);
            break;
        case METHOD_1_AGAINST_ALL:
            code = new StandardCode(numClassifiers);
            break;
        default:
            throw new Exception("Unrecognized correction code type");
        }
        numClassifiers = code.size();
        m_Classifiers = Classifier.makeCopies(m_Classifier, numClassifiers);
        m_ClassFilters = new MakeIndicator[numClassifiers];
        for (int i = 0; i < m_Classifiers.length; i++) {
            m_ClassFilters[i] = new MakeIndicator();
            MakeIndicator classFilter = (MakeIndicator) m_ClassFilters[i];
            classFilter.setAttributeIndex("" + (insts.classIndex() + 1));
            classFilter.setValueIndices(code.getIndices(i));
            classFilter.setNumeric(false);
            classFilter.setInputFormat(insts);
            newInsts = Filter.useFilter(insts, m_ClassFilters[i]);
            m_Classifiers[i].buildClassifier(newInsts);
        }
    }
    m_ClassAttribute = insts.classAttribute();
}

From source file:algoritmogeneticocluster.NewClass.java

public static void main(String[] args) throws Exception {
    BufferedReader datafile = readDataFile("tabela10.arff");

    Instances data = new Instances(datafile);
    data.setClassIndex(data.numAttributes() - 1);

    // Do 10-split cross validation
    Instances[][] split = crossValidationSplit(data, 10);

    // Separate split into training and testing arrays
    Instances[] trainingSplits = split[0];
    Instances[] testingSplits = split[1];

    // Use a set of classifiers
    Classifier[] models = { new SMO(), new J48(), // a decision tree
            new PART(), new DecisionTable(), //decision table majority classifier
            new DecisionStump() //one-level decision tree

    };//from  ww  w .jav a2  s  .  c om

    // Run for each model
    for (int j = 0; j < models.length; j++) {

        // Collect every group of predictions for current model in a FastVector
        FastVector predictions = new FastVector();

        // For each training-testing split pair, train and test the classifier
        for (int i = 0; i < trainingSplits.length; i++) {
            Evaluation validation = classify(models[j], trainingSplits[i], testingSplits[i]);

            predictions.appendElements(validation.predictions());

            // Uncomment to see the summary for each training-testing pair.
            //System.out.println(models[j].toString());
        }

        // Calculate overall accuracy of current classifier on all splits
        double accuracy = calculateAccuracy(predictions);

        // Print current classifier's name and accuracy in a complicated,
        // but nice-looking way.
        System.out.println("Accuracy of " + models[j].getClass().getSimpleName() + ": "
                + String.format("%.2f%%", accuracy) + "\n---------------------------------");
    }

}