List of usage examples for weka.core FastVector FastVector
public FastVector(int capacity)
From source file:ArrayLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set.//from ww w.jav a 2s .c o m * * @return the structure of the data set as an empty set of Instances * @exception IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (m_data == null) { throw new IOException("No source has been specified"); } if (m_structure == null) { getStructure(); } m_cumulativeStructure = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { m_cumulativeStructure.addElement(new Hashtable()); } m_cumulativeInstances = new FastVector(); FastVector current; for (int i = 0; i < m_data.length; i++) { current = getInstance(m_data[i]); m_cumulativeInstances.addElement(current); } FastVector atts = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { String attname = m_structure.attribute(i).name(); Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i)); if (tempHash.size() == 0) { atts.addElement(new Attribute(attname)); } else { if (m_StringAttributes.isInRange(i)) { atts.addElement(new Attribute(attname, (FastVector) null)); } else { FastVector values = new FastVector(tempHash.size()); // add dummy objects in order to make the FastVector's size == capacity for (int z = 0; z < tempHash.size(); z++) { values.addElement("dummy"); } Enumeration e = tempHash.keys(); while (e.hasMoreElements()) { Object ob = e.nextElement(); // if (ob instanceof Double) { int index = ((Integer) tempHash.get(ob)).intValue(); String s = ob.toString(); if (s.startsWith("'") || s.startsWith("\"")) s = s.substring(1, s.length() - 1); values.setElementAt(new String(s), index); // } } atts.addElement(new Attribute(attname, values)); } } } // make the instances String relationName; relationName = "ArrayData"; Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size()); for (int i = 0; i < m_cumulativeInstances.size(); i++) { current = ((FastVector) m_cumulativeInstances.elementAt(i)); double[] vals = new double[dataSet.numAttributes()]; for (int j = 0; j < current.size(); j++) { Object cval = current.elementAt(j); if (cval instanceof String) { if (((String) cval).compareTo(m_MissingValue) == 0) { vals[j] = Instance.missingValue(); } else { if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue((String) cval); } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else { throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!"); } } } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue("" + cval); } else { vals[j] = ((Double) cval).doubleValue(); } } dataSet.add(new Instance(1.0, vals)); } m_structure = new Instances(dataSet, 0); m_cumulativeStructure = null; // conserve memory return dataSet; }
From source file:CopiaSeg3.java
public static void main(String[] args) throws Exception { BufferedReader datafile = readDataFile("breast-cancer-wisconsin.arff"); Instances data = new Instances(datafile); data.setClassIndex(data.numAttributes() - 1); // Elije el nmero de particiones para la valicacin (4 = 75% Train, 25% Test) Instances[] split = split(data, 4);/* w ww. j a v a2s . c o m*/ // Separa los conjuntos en los arrays trainning y testing Instances trainingSplits = split[0]; Instances testingSplits = split[1]; // Elegir un conjunto de clasificadores Classifier[] models = { new MultilayerPerceptron() //, new J48 //, ... }; FastVector fvWekaAttributes = new FastVector(9); // Ejecutar cada clasificador for (int j = 0; j < models.length; j++) { // Collect every group of predictions for current model in a FastVector FastVector predictions = new FastVector(); // For each training-testing split pair, train and test the classifier Evaluation validation = simpleClassify(models[j], trainingSplits, testingSplits); predictions.appendElements(validation.predictions()); // Uncomment to see the summary for each training-testing pair. System.out.println(models[j].toString()); // Calculate overall accuracy of current classifier on all splits double accuracy = calculateAccuracy(predictions); // // Print current classifier's name and accuracy in a complicated, but nice-looking way. System.out.println(models[j].getClass().getSimpleName() + " Accuracy: " + String.format("%.2f%%", accuracy) + "\n====================="); // // // Step 4: use the classifier // // For real world applications, the actual use of the classifier is the ultimate goal. Heres the simplest way to achieve that. Lets say weve built an instance (named iUse) as explained in step 2: // // Specify that the instance belong to the training set // // in order to inherit from the set description Instance iUse = new DenseInstance(9); iUse.setValue((Attribute) predictions.elementAt(0), 4); iUse.setValue((Attribute) predictions.elementAt(1), 8); iUse.setValue((Attribute) predictions.elementAt(2), 8); iUse.setValue((Attribute) predictions.elementAt(3), 5); iUse.setValue((Attribute) predictions.elementAt(4), 4); iUse.setValue((Attribute) predictions.elementAt(5), 5); iUse.setValue((Attribute) predictions.elementAt(6), 10); iUse.setValue((Attribute) predictions.elementAt(7), 4); iUse.setValue((Attribute) predictions.elementAt(8), 1); iUse.setDataset(trainingSplits); // // // Get the likelihood of each classes // fDistribution[0] is the probability of being positive? // fDistribution[1] is the probability of being negative? double[] fDistribution = models[j].distributionForInstance(iUse); System.out.println("Probabilidad positivo: " + fDistribution[0]); System.out.println("Probabilidad negativo: " + fDistribution[1]); } }
From source file:LabeledItemSet.java
License:Open Source License
/** * Deletes all item sets that don't have minimum support and have more than maximum support * @return the reduced set of item sets/*from ww w .j a v a2 s .c o m*/ * @param maxSupport the maximum support * @param itemSets the set of item sets to be pruned * @param minSupport the minimum number of transactions to be covered */ public static FastVector deleteItemSets(FastVector itemSets, int minSupport, int maxSupport) { FastVector newVector = new FastVector(itemSets.size()); for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet current = (LabeledItemSet) itemSets.elementAt(i); if ((current.m_ruleSupCounter >= minSupport) && (current.m_ruleSupCounter <= maxSupport)) newVector.addElement(current); } return newVector; }
From source file:LabeledItemSet.java
License:Open Source License
/** * Prunes a set of (k)-item sets using the given (k-1)-item sets. * * @param toPrune the set of (k)-item sets to be pruned * @param kMinusOne the (k-1)-item sets to be used for pruning * @return the pruned set of item sets//from w w w. j a v a2 s .co m */ public static FastVector pruneItemSets(FastVector toPrune, Hashtable kMinusOne) { FastVector newVector = new FastVector(toPrune.size()); int help, j; for (int i = 0; i < toPrune.size(); i++) { LabeledItemSet current = (LabeledItemSet) toPrune.elementAt(i); for (j = 0; j < current.m_items.length; j++) { if (current.m_items[j] != -1) { help = current.m_items[j]; current.m_items[j] = -1; if (kMinusOne.get(current) != null && (current.m_classLabel == (((Integer) kMinusOne.get(current)).intValue()))) current.m_items[j] = help; else { current.m_items[j] = help; break; } } } if (j == current.m_items.length) newVector.addElement(current); } return newVector; }
From source file:TextDirectoryToArff.java
License:Open Source License
public Instances createDataset(String directoryPath) throws Exception { FastVector atts = new FastVector(2); atts.addElement(new Attribute("filename", (FastVector) null)); atts.addElement(new Attribute("contents", (FastVector) null)); Instances data = new Instances("text_files_in_" + directoryPath, atts, 0); File dir = new File(directoryPath); String[] files = dir.list();/*from w ww. j a va 2 s. c om*/ for (int i = 0; i < files.length; i++) { if (files[i].endsWith(".txt")) { try { double[] newInst = new double[2]; newInst[0] = (double) data.attribute(0).addStringValue(files[i]); File txt = new File(directoryPath + File.separator + files[i]); InputStreamReader is; is = new InputStreamReader(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(txtStr.toString()); data.add(new Instance(1.0, newInst)); } catch (Exception e) { //System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]); } } } return data; }
From source file:arffGenerator.TextDirectoryToArff.java
License:Open Source License
@Override public Instances createDatasetSupervised(String filePath) throws Exception { File dir = new File(filePath); File fileAux;//from w w w . j a v a2 s .co m String[] files = dir.list(); FastVector classValues = new FastVector(files.length); for (int i = 0; i < files.length; i++) { classValues.addElement(files[i]); } FastVector atts = new FastVector(2); atts.addElement(new Attribute("contents", (FastVector) null)); atts.addElement(new Attribute("class", classValues)); Instances data = new Instances("text_files_in_" + filePath, atts, 0); for (int i = 0; i < files.length; i++) { fileAux = new File(filePath + "/" + files[i]); if (fileAux.isDirectory()) { cargarAtrribDeClase(files[i], filePath + File.separator + files[i], data); } } return data; }
From source file:arffGenerator.TextDirectoryToArff.java
License:Open Source License
@Override public Instances createDatasetUnsupervised(String filePath) throws Exception { FastVector atts = new FastVector(2); atts.addElement(new Attribute("contents", (FastVector) null)); FastVector classValues = new FastVector(1); classValues.addElement(""); atts.addElement(new Attribute("class", classValues)); Instances data = new Instances("text_files_in_" + filePath, atts, 0); cargarAtrribDeClase(null, filePath, data); return data;/*w ww .j a va2s . com*/ }
From source file:at.ac.tuwien.ifs.myluceneanalyzers.fa.algorithm.PersianDictionaryCountCompoundWord.java
/** * Creates a new {@link PersianDictionaryCountCompoundWord}. Unlike {@link DictionaryCompoundWordTokenFilter} it considers * onlyLongestMatch to be true and it will only return subwords of maximal size. <br/> * Example: "moonlight" will be returned as "moonlight" only if it is in the dictionary (not as "moonlight, light" as * the DictionaryCompoundWordTokenFilter with onlyLongestMatch=true would. * * @param input/*w w w. j ava 2s . c om*/ * the {@link TokenStream} to process * @param dictionary * the word dictionary to match against. * @param minWordSize * only words longer than this get processed * @param minSubwordSize * only subwords longer than this get to the output stream * @param maxSubwordSize * only subwords shorter than this get to the output stream * @throws Exception */ public PersianDictionaryCountCompoundWord(CharArraySet dictionary, Map<String, Double> mapWordCount, int minWordSize, int minSubwordSize, int maxSubwordSize) throws Exception { if (dictionary == null) { throw new IllegalArgumentException("dictionary cannot be null"); } this.dictionary = dictionary; this.mapWordCount = mapWordCount; this.minWordSize = minWordSize; this.minSubwordSize = minSubwordSize; this.maxSubwordSize = maxSubwordSize; // Create the attributes attributeHermmean = new Attribute("harmmean"); // Declare the class attribute along with its values FastVector fvClassVal = new FastVector(2); fvClassVal.addElement("1"); fvClassVal.addElement("0"); Attribute classAttribute = new Attribute("iscorrect", fvClassVal); // Create list of instances with one element FastVector fvWekaAttributes = new FastVector(2); fvWekaAttributes.addElement(attributeHermmean); fvWekaAttributes.addElement(classAttribute); instances = new Instances("Test relation", fvWekaAttributes, 1); instances.setClassIndex(1); cls = (Classifier) weka.core.SerializationHelper.read("content/adtree.model"); }
From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.BasicClassifierNeutral.java
/** * create test instances//from w w w . j ava 2 s . co m */ @Override protected void newTestInstances() { // Declare two numeric attributes //FastVector attr1 = new FastVector(); //attr1.addElement(new Attribute("attr", (FastVector) null)); Attribute Attribute1 = new Attribute("Document", (FastVector) null); // Declare the class attribute along with its values FastVector fvNominalVal = new FastVector(3); fvNominalVal.addElement("neg"); fvNominalVal.addElement("neut"); fvNominalVal.addElement("pos"); Attribute ClassAttribute = new Attribute("WEKAclass", fvNominalVal); this.setTestSet(Attribute1, ClassAttribute); }
From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.TwitterClassifer.java
public Integer classify(Tweet[] tweets) { // TEST//from ww w . ja v a2 s. co m // Generate two tweet examples Tweet exOne = new Tweet("This is good and fantastic"); exOne.setPreprocessedText("This is good and fantastic"); Tweet exTwo = new Tweet("Horribly, terribly bad and more"); exTwo.setPreprocessedText("Horribly, terribly bad and more"); Tweet exThree = new Tweet( "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh."); exThree.setPreprocessedText( "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh."); Tweet exFour = new Tweet("bad hate worst sick"); exFour.setPreprocessedText("bad hate worst sick"); tweets = new Tweet[] { exOne, exTwo, exThree, exFour }; // TEST // Load model // loadModel(); // Convert Tweet to Instance type // Get String Data // Create attributes for the Instances set Attribute twitter_id = new Attribute("twitter_id"); // Attribute body = new Attribute("body"); FastVector classVal = new FastVector(2); classVal.addElement("pos"); classVal.addElement("neg"); Attribute class_attr = new Attribute("class_attr", classVal); // Add them to a list FastVector attrVector = new FastVector(3); // attrVector.addElement(twitter_id); // attrVector.addElement(new Attribute("body", (FastVector) null)); // attrVector.addElement(class_attr); // Get the number of tweets and then create predictSet int numTweets = tweets.length; Enumeration structAttrs = dataStructure.enumerateAttributes(); // ArrayList<Attribute> attrList = new ArrayList<Attribute>(dataStructure.numAttributes()); while (structAttrs.hasMoreElements()) { attrVector.addElement((Attribute) structAttrs.nextElement()); } Instances predictSet = new Instances("predictInstances", attrVector, numTweets); // Instances predictSet = new Instances(dataStructure); predictSet.setClassIndex(2); // init prediction double prediction = -1; System.out.println("PredictSet matches source structure: " + predictSet.equalHeaders(dataStructure)); System.out.println("PredSet struct: " + predictSet.attribute(0)); System.out.println("PredSet struct: " + predictSet.attribute(1)); System.out.println("PredSet struct: " + predictSet.attribute(2)); // Array to return predictions //double[] tweetsClassified = new double[2][numTweets]; //List<Integer, Double> tweetsClass = new ArrayList<Integer, Double>(numTweets); for (int i = 0; i < numTweets; i++) { String content = (String) tweets[i].getPreprocessedText(); System.out.println("Tweet content: " + content); // attrList Instance tweetInstance = new Instance(predictSet.numAttributes()); tweetInstance.setDataset(predictSet); tweetInstance.setValue(predictSet.attribute(0), i); tweetInstance.setValue(predictSet.attribute(1), content); tweetInstance.setClassMissing(); predictSet.add(tweetInstance); try { // Apply string filter StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(predictSet); Instances filteredPredictSet = Filter.useFilter(predictSet, filter); // Apply model prediction = trainedModel.classifyInstance(filteredPredictSet.instance(i)); filteredPredictSet.instance(i).setClassValue(prediction); System.out.println("Classification: " + filteredPredictSet.instance(i).toString()); System.out.println("Prediction: " + prediction); } catch (Exception ex) { Logger.getLogger(TwitterClassifer.class.getName()).log(Level.SEVERE, null, ex); } } return 0; }