Example usage for weka.core FastVector FastVector

List of usage examples for weka.core FastVector FastVector

Introduction

In this page you can find the example usage for weka.core FastVector FastVector.

Prototype

public FastVector(int capacity) 

Source Link

Document

Constructs a vector with the given capacity.

Usage

From source file:ArrayLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined
 * by a call to getStructure then method should do so before processing
 * the rest of the data set.//from  ww  w.jav  a  2s  .c  o m
 *
 * @return the structure of the data set as an empty set of Instances
 * @exception IOException if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (m_data == null) {
        throw new IOException("No source has been specified");
    }

    if (m_structure == null) {
        getStructure();
    }

    m_cumulativeStructure = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        m_cumulativeStructure.addElement(new Hashtable());
    }

    m_cumulativeInstances = new FastVector();
    FastVector current;

    for (int i = 0; i < m_data.length; i++) {
        current = getInstance(m_data[i]);

        m_cumulativeInstances.addElement(current);
    }

    FastVector atts = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        String attname = m_structure.attribute(i).name();
        Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i));
        if (tempHash.size() == 0) {
            atts.addElement(new Attribute(attname));
        } else {
            if (m_StringAttributes.isInRange(i)) {
                atts.addElement(new Attribute(attname, (FastVector) null));
            } else {
                FastVector values = new FastVector(tempHash.size());
                // add dummy objects in order to make the FastVector's size == capacity
                for (int z = 0; z < tempHash.size(); z++) {
                    values.addElement("dummy");
                }
                Enumeration e = tempHash.keys();
                while (e.hasMoreElements()) {
                    Object ob = e.nextElement();
                    //     if (ob instanceof Double) {
                    int index = ((Integer) tempHash.get(ob)).intValue();
                    String s = ob.toString();
                    if (s.startsWith("'") || s.startsWith("\""))
                        s = s.substring(1, s.length() - 1);
                    values.setElementAt(new String(s), index);
                    //     }
                }
                atts.addElement(new Attribute(attname, values));
            }
        }
    }

    // make the instances
    String relationName;
    relationName = "ArrayData";
    Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size());

    for (int i = 0; i < m_cumulativeInstances.size(); i++) {
        current = ((FastVector) m_cumulativeInstances.elementAt(i));
        double[] vals = new double[dataSet.numAttributes()];
        for (int j = 0; j < current.size(); j++) {
            Object cval = current.elementAt(j);
            if (cval instanceof String) {
                if (((String) cval).compareTo(m_MissingValue) == 0) {
                    vals[j] = Instance.missingValue();
                } else {
                    if (dataSet.attribute(j).isString()) {
                        vals[j] = dataSet.attribute(j).addStringValue((String) cval);
                    } else if (dataSet.attribute(j).isNominal()) {
                        // find correct index
                        Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                        int index = ((Integer) lookup.get(cval)).intValue();
                        vals[j] = index;
                    } else {
                        throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!");
                    }
                }
            } else if (dataSet.attribute(j).isNominal()) {
                // find correct index
                Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                int index = ((Integer) lookup.get(cval)).intValue();
                vals[j] = index;
            } else if (dataSet.attribute(j).isString()) {
                vals[j] = dataSet.attribute(j).addStringValue("" + cval);
            } else {
                vals[j] = ((Double) cval).doubleValue();
            }
        }
        dataSet.add(new Instance(1.0, vals));
    }
    m_structure = new Instances(dataSet, 0);
    m_cumulativeStructure = null; // conserve memory

    return dataSet;
}

From source file:CopiaSeg3.java

public static void main(String[] args) throws Exception {

    BufferedReader datafile = readDataFile("breast-cancer-wisconsin.arff");

    Instances data = new Instances(datafile);
    data.setClassIndex(data.numAttributes() - 1);

    // Elije el nmero de particiones para la valicacin (4 = 75% Train, 25% Test)
    Instances[] split = split(data, 4);/*  w ww. j a v  a2s  .  c o  m*/

    // Separa los conjuntos en los arrays trainning y testing
    Instances trainingSplits = split[0];
    Instances testingSplits = split[1];

    // Elegir un conjunto de clasificadores
    Classifier[] models = { new MultilayerPerceptron()
            //, new J48 
            //, ...
    };

    FastVector fvWekaAttributes = new FastVector(9);

    // Ejecutar cada clasificador
    for (int j = 0; j < models.length; j++) {

        // Collect every group of predictions for current model in a FastVector
        FastVector predictions = new FastVector();

        // For each training-testing split pair, train and test the classifier
        Evaluation validation = simpleClassify(models[j], trainingSplits, testingSplits);
        predictions.appendElements(validation.predictions());

        // Uncomment to see the summary for each training-testing pair.
        System.out.println(models[j].toString());

        // Calculate overall accuracy of current classifier on all splits
        double accuracy = calculateAccuracy(predictions);

        //            // Print current classifier's name and accuracy in a complicated, but nice-looking way.
        System.out.println(models[j].getClass().getSimpleName() + " Accuracy: "
                + String.format("%.2f%%", accuracy) + "\n=====================");
        //            
        //            // Step 4: use the classifier
        //            // For real world applications, the actual use of the classifier is the ultimate goal. Heres the simplest way to achieve that. Lets say weve built an instance (named iUse) as explained in step 2:
        //            // Specify that the instance belong to the training set
        //            // in order to inherit from the set description

        Instance iUse = new DenseInstance(9);
        iUse.setValue((Attribute) predictions.elementAt(0), 4);
        iUse.setValue((Attribute) predictions.elementAt(1), 8);
        iUse.setValue((Attribute) predictions.elementAt(2), 8);
        iUse.setValue((Attribute) predictions.elementAt(3), 5);
        iUse.setValue((Attribute) predictions.elementAt(4), 4);
        iUse.setValue((Attribute) predictions.elementAt(5), 5);
        iUse.setValue((Attribute) predictions.elementAt(6), 10);
        iUse.setValue((Attribute) predictions.elementAt(7), 4);
        iUse.setValue((Attribute) predictions.elementAt(8), 1);

        iUse.setDataset(trainingSplits);
        //
        //            // Get the likelihood of each classes
        // fDistribution[0] is the probability of being positive?
        // fDistribution[1] is the probability of being negative?
        double[] fDistribution = models[j].distributionForInstance(iUse);

        System.out.println("Probabilidad positivo: " + fDistribution[0]);
        System.out.println("Probabilidad negativo: " + fDistribution[1]);
    }

}

From source file:LabeledItemSet.java

License:Open Source License

/**
 * Deletes all item sets that don't have minimum support and have more than maximum support
 * @return the reduced set of item sets/*from   ww w  .j  a  v  a2  s  .c  o  m*/
 * @param maxSupport the maximum support
 * @param itemSets the set of item sets to be pruned
 * @param minSupport the minimum number of transactions to be covered
 */
public static FastVector deleteItemSets(FastVector itemSets, int minSupport, int maxSupport) {

    FastVector newVector = new FastVector(itemSets.size());

    for (int i = 0; i < itemSets.size(); i++) {
        LabeledItemSet current = (LabeledItemSet) itemSets.elementAt(i);
        if ((current.m_ruleSupCounter >= minSupport) && (current.m_ruleSupCounter <= maxSupport))
            newVector.addElement(current);
    }
    return newVector;
}

From source file:LabeledItemSet.java

License:Open Source License

/**
* Prunes a set of (k)-item sets using the given (k-1)-item sets.
*
* @param toPrune the set of (k)-item sets to be pruned
* @param kMinusOne the (k-1)-item sets to be used for pruning
* @return the pruned set of item sets//from   w w  w. j a v  a2 s .co m
*/
public static FastVector pruneItemSets(FastVector toPrune, Hashtable kMinusOne) {

    FastVector newVector = new FastVector(toPrune.size());
    int help, j;

    for (int i = 0; i < toPrune.size(); i++) {
        LabeledItemSet current = (LabeledItemSet) toPrune.elementAt(i);

        for (j = 0; j < current.m_items.length; j++) {
            if (current.m_items[j] != -1) {
                help = current.m_items[j];
                current.m_items[j] = -1;
                if (kMinusOne.get(current) != null
                        && (current.m_classLabel == (((Integer) kMinusOne.get(current)).intValue())))
                    current.m_items[j] = help;
                else {
                    current.m_items[j] = help;
                    break;
                }
            }
        }
        if (j == current.m_items.length)
            newVector.addElement(current);
    }
    return newVector;
}

From source file:TextDirectoryToArff.java

License:Open Source License

public Instances createDataset(String directoryPath) throws Exception {

    FastVector atts = new FastVector(2);
    atts.addElement(new Attribute("filename", (FastVector) null));
    atts.addElement(new Attribute("contents", (FastVector) null));
    Instances data = new Instances("text_files_in_" + directoryPath, atts, 0);

    File dir = new File(directoryPath);
    String[] files = dir.list();/*from  w ww. j a  va  2 s. c om*/
    for (int i = 0; i < files.length; i++) {
        if (files[i].endsWith(".txt")) {
            try {
                double[] newInst = new double[2];
                newInst[0] = (double) data.attribute(0).addStringValue(files[i]);
                File txt = new File(directoryPath + File.separator + files[i]);
                InputStreamReader is;
                is = new InputStreamReader(new FileInputStream(txt));
                StringBuffer txtStr = new StringBuffer();
                int c;
                while ((c = is.read()) != -1) {
                    txtStr.append((char) c);
                }
                newInst[1] = (double) data.attribute(1).addStringValue(txtStr.toString());
                data.add(new Instance(1.0, newInst));
            } catch (Exception e) {
                //System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]);
            }
        }
    }
    return data;
}

From source file:arffGenerator.TextDirectoryToArff.java

License:Open Source License

@Override
public Instances createDatasetSupervised(String filePath) throws Exception {

    File dir = new File(filePath);
    File fileAux;//from  w w w . j a  v  a2  s .co m
    String[] files = dir.list();
    FastVector classValues = new FastVector(files.length);
    for (int i = 0; i < files.length; i++) {
        classValues.addElement(files[i]);
    }
    FastVector atts = new FastVector(2);
    atts.addElement(new Attribute("contents", (FastVector) null));
    atts.addElement(new Attribute("class", classValues));
    Instances data = new Instances("text_files_in_" + filePath, atts, 0);
    for (int i = 0; i < files.length; i++) {
        fileAux = new File(filePath + "/" + files[i]);
        if (fileAux.isDirectory()) {
            cargarAtrribDeClase(files[i], filePath + File.separator + files[i], data);
        }
    }
    return data;
}

From source file:arffGenerator.TextDirectoryToArff.java

License:Open Source License

@Override
public Instances createDatasetUnsupervised(String filePath) throws Exception {
    FastVector atts = new FastVector(2);
    atts.addElement(new Attribute("contents", (FastVector) null));
    FastVector classValues = new FastVector(1);
    classValues.addElement("");
    atts.addElement(new Attribute("class", classValues));
    Instances data = new Instances("text_files_in_" + filePath, atts, 0);
    cargarAtrribDeClase(null, filePath, data);
    return data;/*w ww .j  a  va2s  . com*/
}

From source file:at.ac.tuwien.ifs.myluceneanalyzers.fa.algorithm.PersianDictionaryCountCompoundWord.java

/**
* Creates a new {@link PersianDictionaryCountCompoundWord}. Unlike {@link DictionaryCompoundWordTokenFilter} it considers
* onlyLongestMatch to be true and it will only return subwords of maximal size. <br/>
* Example: "moonlight" will be returned as "moonlight" only if it is in the dictionary (not as "moonlight, light" as 
* the DictionaryCompoundWordTokenFilter with onlyLongestMatch=true would.
* 
* @param input/*w  w  w.  j  ava 2s .  c om*/
*          the {@link TokenStream} to process
* @param dictionary
*          the word dictionary to match against.
* @param minWordSize
*          only words longer than this get processed
* @param minSubwordSize
*          only subwords longer than this get to the output stream
* @param maxSubwordSize
*          only subwords shorter than this get to the output stream
 * @throws Exception 
*/
public PersianDictionaryCountCompoundWord(CharArraySet dictionary, Map<String, Double> mapWordCount,
        int minWordSize, int minSubwordSize, int maxSubwordSize) throws Exception {
    if (dictionary == null) {
        throw new IllegalArgumentException("dictionary cannot be null");
    }
    this.dictionary = dictionary;
    this.mapWordCount = mapWordCount;
    this.minWordSize = minWordSize;
    this.minSubwordSize = minSubwordSize;
    this.maxSubwordSize = maxSubwordSize;

    // Create the attributes
    attributeHermmean = new Attribute("harmmean");

    // Declare the class attribute along with its values
    FastVector fvClassVal = new FastVector(2);
    fvClassVal.addElement("1");
    fvClassVal.addElement("0");
    Attribute classAttribute = new Attribute("iscorrect", fvClassVal);

    // Create list of instances with one element 
    FastVector fvWekaAttributes = new FastVector(2);
    fvWekaAttributes.addElement(attributeHermmean);
    fvWekaAttributes.addElement(classAttribute);

    instances = new Instances("Test relation", fvWekaAttributes, 1);
    instances.setClassIndex(1);

    cls = (Classifier) weka.core.SerializationHelper.read("content/adtree.model");

}

From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.BasicClassifierNeutral.java

/**
 * create test instances//from  w w  w  . j ava  2  s  .  co  m
 */
@Override
protected void newTestInstances() {
    // Declare two numeric attributes
    //FastVector attr1 = new FastVector();
    //attr1.addElement(new Attribute("attr", (FastVector) null));
    Attribute Attribute1 = new Attribute("Document", (FastVector) null);

    // Declare the class attribute along with its values
    FastVector fvNominalVal = new FastVector(3);
    fvNominalVal.addElement("neg");
    fvNominalVal.addElement("neut");
    fvNominalVal.addElement("pos");
    Attribute ClassAttribute = new Attribute("WEKAclass", fvNominalVal);

    this.setTestSet(Attribute1, ClassAttribute);
}

From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.TwitterClassifer.java

public Integer classify(Tweet[] tweets) {
    // TEST//from  ww  w  . ja v  a2  s. co m

    // Generate two tweet examples
    Tweet exOne = new Tweet("This is good and fantastic");
    exOne.setPreprocessedText("This is good and fantastic");
    Tweet exTwo = new Tweet("Horribly, terribly bad and more");
    exTwo.setPreprocessedText("Horribly, terribly bad and more");
    Tweet exThree = new Tweet(
            "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh.");
    exThree.setPreprocessedText(
            "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh.");
    Tweet exFour = new Tweet("bad hate worst sick");
    exFour.setPreprocessedText("bad hate worst sick");
    tweets = new Tweet[] { exOne, exTwo, exThree, exFour };
    // TEST

    // Load model
    //        loadModel();
    // Convert Tweet to Instance type 
    // Get String Data
    // Create attributes for the Instances set
    Attribute twitter_id = new Attribute("twitter_id");
    //        Attribute body = new Attribute("body");

    FastVector classVal = new FastVector(2);
    classVal.addElement("pos");
    classVal.addElement("neg");

    Attribute class_attr = new Attribute("class_attr", classVal);

    // Add them to a list
    FastVector attrVector = new FastVector(3);
    //        attrVector.addElement(twitter_id);
    //        attrVector.addElement(new Attribute("body", (FastVector) null));
    //        attrVector.addElement(class_attr);

    // Get the number of tweets and then create predictSet
    int numTweets = tweets.length;
    Enumeration structAttrs = dataStructure.enumerateAttributes();

    //        ArrayList<Attribute> attrList = new ArrayList<Attribute>(dataStructure.numAttributes());
    while (structAttrs.hasMoreElements()) {
        attrVector.addElement((Attribute) structAttrs.nextElement());
    }
    Instances predictSet = new Instances("predictInstances", attrVector, numTweets);
    //        Instances predictSet = new Instances(dataStructure);
    predictSet.setClassIndex(2);

    // init prediction
    double prediction = -1;

    System.out.println("PredictSet matches source structure: " + predictSet.equalHeaders(dataStructure));

    System.out.println("PredSet struct: " + predictSet.attribute(0));
    System.out.println("PredSet struct: " + predictSet.attribute(1));
    System.out.println("PredSet struct: " + predictSet.attribute(2));
    // Array to return predictions 
    //double[] tweetsClassified = new double[2][numTweets];
    //List<Integer, Double> tweetsClass = new ArrayList<Integer, Double>(numTweets);
    for (int i = 0; i < numTweets; i++) {
        String content = (String) tweets[i].getPreprocessedText();

        System.out.println("Tweet content: " + content);

        //            attrList
        Instance tweetInstance = new Instance(predictSet.numAttributes());

        tweetInstance.setDataset(predictSet);

        tweetInstance.setValue(predictSet.attribute(0), i);
        tweetInstance.setValue(predictSet.attribute(1), content);
        tweetInstance.setClassMissing();

        predictSet.add(tweetInstance);

        try {
            // Apply string filter
            StringToWordVector filter = new StringToWordVector();

            filter.setInputFormat(predictSet);
            Instances filteredPredictSet = Filter.useFilter(predictSet, filter);

            // Apply model
            prediction = trainedModel.classifyInstance(filteredPredictSet.instance(i));
            filteredPredictSet.instance(i).setClassValue(prediction);
            System.out.println("Classification: " + filteredPredictSet.instance(i).toString());
            System.out.println("Prediction: " + prediction);

        } catch (Exception ex) {
            Logger.getLogger(TwitterClassifer.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    return 0;
}