List of usage examples for weka.core FastVector elementAt
public final E elementAt(int index)
From source file:TextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set./* w w w .j av a 2s. c om*/ * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); FastVector classes = new FastVector(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.addElement(enm.nextElement()); Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.elementAt(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]); double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); BufferedReader is; if (m_charSet == null || m_charSet.length() == 0) { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt))); } else { is = new BufferedReader(new InputStreamReader(new FileInputStream(txt), m_charSet)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) newInst[1] = (double) data.attribute(1) .addStringValue(subdirPath + File.separator + files[j]); newInst[data.classIndex()] = (double) k; data.add(new Instance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]); } } } return data; }
From source file:ArrayLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set.//from w ww .jav a 2 s . c o m * * @return the structure of the data set as an empty set of Instances * @exception IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (m_data == null) { throw new IOException("No source has been specified"); } if (m_structure == null) { getStructure(); } m_cumulativeStructure = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { m_cumulativeStructure.addElement(new Hashtable()); } m_cumulativeInstances = new FastVector(); FastVector current; for (int i = 0; i < m_data.length; i++) { current = getInstance(m_data[i]); m_cumulativeInstances.addElement(current); } FastVector atts = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { String attname = m_structure.attribute(i).name(); Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i)); if (tempHash.size() == 0) { atts.addElement(new Attribute(attname)); } else { if (m_StringAttributes.isInRange(i)) { atts.addElement(new Attribute(attname, (FastVector) null)); } else { FastVector values = new FastVector(tempHash.size()); // add dummy objects in order to make the FastVector's size == capacity for (int z = 0; z < tempHash.size(); z++) { values.addElement("dummy"); } Enumeration e = tempHash.keys(); while (e.hasMoreElements()) { Object ob = e.nextElement(); // if (ob instanceof Double) { int index = ((Integer) tempHash.get(ob)).intValue(); String s = ob.toString(); if (s.startsWith("'") || s.startsWith("\"")) s = s.substring(1, s.length() - 1); values.setElementAt(new String(s), index); // } } atts.addElement(new Attribute(attname, values)); } } } // make the instances String relationName; relationName = "ArrayData"; Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size()); for (int i = 0; i < m_cumulativeInstances.size(); i++) { current = ((FastVector) m_cumulativeInstances.elementAt(i)); double[] vals = new double[dataSet.numAttributes()]; for (int j = 0; j < current.size(); j++) { Object cval = current.elementAt(j); if (cval instanceof String) { if (((String) cval).compareTo(m_MissingValue) == 0) { vals[j] = Instance.missingValue(); } else { if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue((String) cval); } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else { throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!"); } } } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue("" + cval); } else { vals[j] = ((Double) cval).doubleValue(); } } dataSet.add(new Instance(1.0, vals)); } m_structure = new Instances(dataSet, 0); m_cumulativeStructure = null; // conserve memory return dataSet; }
From source file:ArrayLoader.java
License:Open Source License
/** * Checks the current instance against what is known about the structure * of the data set so far. If there is a nominal value for an attribute * that was beleived to be numeric then all previously seen values for this * attribute are stored in a Hashtable.//from w w w . j a v a2 s . c o m * * @param current a <code>FastVector</code> value * @exception Exception if an error occurs * * <pre><jml> * private_normal_behavior * requires: current != null; * also * private_exceptional_behavior * requires: current == null * || (* unrecognized object type in current *); * signals: (Exception); * </jml></pre> */ private void checkStructure(FastVector current) throws Exception { if (current == null) { throw new Exception("current shouldn't be null in checkStructure"); } // initialize ranges, if necessary if (m_FirstCheck) { m_NominalAttributes.setUpper(current.size() - 1); m_StringAttributes.setUpper(current.size() - 1); m_FirstCheck = false; } for (int i = 0; i < current.size(); i++) { Object ob = current.elementAt(i); if ((ob instanceof String) || (m_NominalAttributes.isInRange(i)) || (m_StringAttributes.isInRange(i))) { if (ob.toString().compareTo(m_MissingValue) == 0) { // do nothing } else { Hashtable tempHash = (Hashtable) m_cumulativeStructure.elementAt(i); if (!tempHash.containsKey(ob)) { // may have found a nominal value in what was previously thought to // be a numeric variable. if (tempHash.size() == 0) { for (int j = 0; j < m_cumulativeInstances.size(); j++) { FastVector tempUpdate = ((FastVector) m_cumulativeInstances.elementAt(j)); Object tempO = tempUpdate.elementAt(i); if (tempO instanceof String) { // must have been a missing value } else { if (!tempHash.containsKey(tempO)) { tempHash.put(new Double(((Double) tempO).doubleValue()), new Integer(tempHash.size())); } } } } int newIndex = tempHash.size(); tempHash.put(ob, new Integer(newIndex)); } } } else if (ob instanceof Double) { Hashtable tempHash = (Hashtable) m_cumulativeStructure.elementAt(i); if (tempHash.size() != 0) { if (!tempHash.containsKey(ob)) { int newIndex = tempHash.size(); tempHash.put(new Double(((Double) ob).doubleValue()), new Integer(newIndex)); } } } else { throw new Exception("Wrong object type in checkStructure!"); } } }
From source file:CopiaSeg3.java
public static double calculateAccuracy(FastVector predictions) { double correct = 0; for (int i = 0; i < predictions.size(); i++) { NominalPrediction np = (NominalPrediction) predictions.elementAt(i); if (np.predicted() == np.actual()) { correct++;// ww w . j ava2s.c o m } } return 100 * correct / predictions.size(); }
From source file:CopiaSeg3.java
public static void main(String[] args) throws Exception { BufferedReader datafile = readDataFile("breast-cancer-wisconsin.arff"); Instances data = new Instances(datafile); data.setClassIndex(data.numAttributes() - 1); // Elije el nmero de particiones para la valicacin (4 = 75% Train, 25% Test) Instances[] split = split(data, 4);// ww w . ja v a2s .co m // Separa los conjuntos en los arrays trainning y testing Instances trainingSplits = split[0]; Instances testingSplits = split[1]; // Elegir un conjunto de clasificadores Classifier[] models = { new MultilayerPerceptron() //, new J48 //, ... }; FastVector fvWekaAttributes = new FastVector(9); // Ejecutar cada clasificador for (int j = 0; j < models.length; j++) { // Collect every group of predictions for current model in a FastVector FastVector predictions = new FastVector(); // For each training-testing split pair, train and test the classifier Evaluation validation = simpleClassify(models[j], trainingSplits, testingSplits); predictions.appendElements(validation.predictions()); // Uncomment to see the summary for each training-testing pair. System.out.println(models[j].toString()); // Calculate overall accuracy of current classifier on all splits double accuracy = calculateAccuracy(predictions); // // Print current classifier's name and accuracy in a complicated, but nice-looking way. System.out.println(models[j].getClass().getSimpleName() + " Accuracy: " + String.format("%.2f%%", accuracy) + "\n====================="); // // // Step 4: use the classifier // // For real world applications, the actual use of the classifier is the ultimate goal. Heres the simplest way to achieve that. Lets say weve built an instance (named iUse) as explained in step 2: // // Specify that the instance belong to the training set // // in order to inherit from the set description Instance iUse = new DenseInstance(9); iUse.setValue((Attribute) predictions.elementAt(0), 4); iUse.setValue((Attribute) predictions.elementAt(1), 8); iUse.setValue((Attribute) predictions.elementAt(2), 8); iUse.setValue((Attribute) predictions.elementAt(3), 5); iUse.setValue((Attribute) predictions.elementAt(4), 4); iUse.setValue((Attribute) predictions.elementAt(5), 5); iUse.setValue((Attribute) predictions.elementAt(6), 10); iUse.setValue((Attribute) predictions.elementAt(7), 4); iUse.setValue((Attribute) predictions.elementAt(8), 1); iUse.setDataset(trainingSplits); // // // Get the likelihood of each classes // fDistribution[0] is the probability of being positive? // fDistribution[1] is the probability of being negative? double[] fDistribution = models[j].distributionForInstance(iUse); System.out.println("Probabilidad positivo: " + fDistribution[0]); System.out.println("Probabilidad negativo: " + fDistribution[1]); } }
From source file:LabeledItemSet.java
License:Open Source License
/** * Deletes all item sets that don't have minimum support and have more than maximum support * @return the reduced set of item sets//from www . j a v a2s .c om * @param maxSupport the maximum support * @param itemSets the set of item sets to be pruned * @param minSupport the minimum number of transactions to be covered */ public static FastVector deleteItemSets(FastVector itemSets, int minSupport, int maxSupport) { FastVector newVector = new FastVector(itemSets.size()); for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet current = (LabeledItemSet) itemSets.elementAt(i); if ((current.m_ruleSupCounter >= minSupport) && (current.m_ruleSupCounter <= maxSupport)) newVector.addElement(current); } return newVector; }
From source file:LabeledItemSet.java
License:Open Source License
/** * Return a hashtable filled with the given item sets. * * @param itemSets the set of item sets to be used for filling the hash table * @param initialSize the initial size of the hashtable * @return the generated hashtable/* w w w .j a v a 2 s . co m*/ */ public static Hashtable getHashtable(FastVector itemSets, int initialSize) { Hashtable hashtable = new Hashtable(initialSize); for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet current = (LabeledItemSet) itemSets.elementAt(i); hashtable.put(current, new Integer(current.m_classLabel)); } return hashtable; }
From source file:LabeledItemSet.java
License:Open Source License
/** * Merges all item sets in the set of (k-1)-item sets * to create the (k)-item sets and updates the counters. * @return the generated (k)-item sets/*from w w w .j a va 2 s .c o m*/ * @param totalTrans the total number of transactions * @param itemSets the set of (k-1)-item sets * @param size the value of (k-1) */ public static FastVector mergeAllItemSets(FastVector itemSets, int size, int totalTrans) { FastVector newVector = new FastVector(); LabeledItemSet result; int numFound, k; for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet first = (LabeledItemSet) itemSets.elementAt(i); out: for (int j = i + 1; j < itemSets.size(); j++) { LabeledItemSet second = (LabeledItemSet) itemSets.elementAt(j); while (first.m_classLabel != second.m_classLabel) { j++; if (j == itemSets.size()) break out; second = (LabeledItemSet) itemSets.elementAt(j); } result = new LabeledItemSet(totalTrans, first.m_classLabel); result.m_items = new int[first.m_items.length]; // Find and copy common prefix of size 'size' numFound = 0; k = 0; while (numFound < size) { if (first.m_items[k] == second.m_items[k]) { if (first.m_items[k] != -1) numFound++; result.m_items[k] = first.m_items[k]; } else break out; k++; } // Check difference while (k < first.m_items.length) { if ((first.m_items[k] != -1) && (second.m_items[k] != -1)) break; else { if (first.m_items[k] != -1) result.m_items[k] = first.m_items[k]; else result.m_items[k] = second.m_items[k]; } k++; } if (k == first.m_items.length) { result.m_ruleSupCounter = 0; result.m_counter = 0; newVector.addElement(result); } } } return newVector; }
From source file:LabeledItemSet.java
License:Open Source License
/** * Prunes a set of (k)-item sets using the given (k-1)-item sets. * * @param toPrune the set of (k)-item sets to be pruned * @param kMinusOne the (k-1)-item sets to be used for pruning * @return the pruned set of item sets//from w ww . j a v a2 s .co m */ public static FastVector pruneItemSets(FastVector toPrune, Hashtable kMinusOne) { FastVector newVector = new FastVector(toPrune.size()); int help, j; for (int i = 0; i < toPrune.size(); i++) { LabeledItemSet current = (LabeledItemSet) toPrune.elementAt(i); for (j = 0; j < current.m_items.length; j++) { if (current.m_items[j] != -1) { help = current.m_items[j]; current.m_items[j] = -1; if (kMinusOne.get(current) != null && (current.m_classLabel == (((Integer) kMinusOne.get(current)).intValue()))) current.m_items[j] = help; else { current.m_items[j] = help; break; } } } if (j == current.m_items.length) newVector.addElement(current); } return newVector; }
From source file:MultiClassClassifier.java
License:Open Source License
/** * Builds the classifiers.//from www . j a v a 2s . co m * * @param insts the training data. * @throws Exception if a classifier can't be built */ public void buildClassifier(Instances insts) throws Exception { Instances newInsts; // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); if (m_Classifier == null) { throw new Exception("No base classifier has been set!"); } m_ZeroR = new ZeroR(); m_ZeroR.buildClassifier(insts); m_TwoClassDataset = null; int numClassifiers = insts.numClasses(); if (numClassifiers <= 2) { m_Classifiers = Classifier.makeCopies(m_Classifier, 1); m_Classifiers[0].buildClassifier(insts); m_ClassFilters = null; } else if (m_Method == METHOD_1_AGAINST_1) { // generate fastvector of pairs FastVector pairs = new FastVector(); for (int i = 0; i < insts.numClasses(); i++) { for (int j = 0; j < insts.numClasses(); j++) { if (j <= i) continue; int[] pair = new int[2]; pair[0] = i; pair[1] = j; pairs.addElement(pair); } } numClassifiers = pairs.size(); m_Classifiers = Classifier.makeCopies(m_Classifier, numClassifiers); m_ClassFilters = new Filter[numClassifiers]; m_SumOfWeights = new double[numClassifiers]; // generate the classifiers for (int i = 0; i < numClassifiers; i++) { RemoveWithValues classFilter = new RemoveWithValues(); classFilter.setAttributeIndex("" + (insts.classIndex() + 1)); classFilter.setModifyHeader(true); classFilter.setInvertSelection(true); classFilter.setNominalIndicesArr((int[]) pairs.elementAt(i)); Instances tempInstances = new Instances(insts, 0); tempInstances.setClassIndex(-1); classFilter.setInputFormat(tempInstances); newInsts = Filter.useFilter(insts, classFilter); if (newInsts.numInstances() > 0) { newInsts.setClassIndex(insts.classIndex()); m_Classifiers[i].buildClassifier(newInsts); m_ClassFilters[i] = classFilter; m_SumOfWeights[i] = newInsts.sumOfWeights(); } else { m_Classifiers[i] = null; m_ClassFilters[i] = null; } } // construct a two-class header version of the dataset m_TwoClassDataset = new Instances(insts, 0); int classIndex = m_TwoClassDataset.classIndex(); m_TwoClassDataset.setClassIndex(-1); m_TwoClassDataset.deleteAttributeAt(classIndex); FastVector classLabels = new FastVector(); classLabels.addElement("class0"); classLabels.addElement("class1"); m_TwoClassDataset.insertAttributeAt(new Attribute("class", classLabels), classIndex); m_TwoClassDataset.setClassIndex(classIndex); } else { // use error correcting code style methods Code code = null; switch (m_Method) { case METHOD_ERROR_EXHAUSTIVE: code = new ExhaustiveCode(numClassifiers); break; case METHOD_ERROR_RANDOM: code = new RandomCode(numClassifiers, (int) (numClassifiers * m_RandomWidthFactor), insts); break; case METHOD_1_AGAINST_ALL: code = new StandardCode(numClassifiers); break; default: throw new Exception("Unrecognized correction code type"); } numClassifiers = code.size(); m_Classifiers = Classifier.makeCopies(m_Classifier, numClassifiers); m_ClassFilters = new MakeIndicator[numClassifiers]; for (int i = 0; i < m_Classifiers.length; i++) { m_ClassFilters[i] = new MakeIndicator(); MakeIndicator classFilter = (MakeIndicator) m_ClassFilters[i]; classFilter.setAttributeIndex("" + (insts.classIndex() + 1)); classFilter.setValueIndices(code.getIndices(i)); classFilter.setNumeric(false); classFilter.setInputFormat(insts); newInsts = Filter.useFilter(insts, m_ClassFilters[i]); m_Classifiers[i].buildClassifier(newInsts); } } m_ClassAttribute = insts.classAttribute(); }