List of usage examples for weka.core Instance stringValue
public String stringValue(Attribute att);
From source file:assign00.KNNClassifier.java
double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2); } else {//from w ww . j ava2s.com if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } else { distance += 1; } } } return distance; }
From source file:assign00.KNNClassifier.java
double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) { double distance = 0; for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) { if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) { distance += abs(instanceLHS.value(i) - instanceRHS.value(i)); } else {// w w w . jav a 2 s.co m if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) { distance += 0; } else { distance += 1; } } } return distance; }
From source file:CEP.GenerateStream.java
public void MakeStream() { File file = new File("C:\\Users\\Weary\\Documents\\w4ndata\\w4ndata.arff"); String pc = System.getProperty("user.dir").toString(); if (pc.contains("gs023850")) { file = new File("C:\\Users\\gs023850\\Documents\\w4ndata\\w4ndata.arff"); }/* w w w. j a va2 s. c o m*/ try { ArffLoader loader = new ArffLoader(); loader.setFile(file); Instances structure = loader.getStructure(); int j = structure.numAttributes(); HeaderManager.SetStructure(new Instances(structure)); Instance current; long previousTimeStamp = 0; String timeStamp = "0"; long wait = 0; while ((current = loader.getNextInstance(structure)) != null) { timeStamp = current.stringValue(0); cepRT.sendEvent(current); System.out.println("Sending event"); previousTimeStamp = WaitTime(timeStamp, previousTimeStamp, wait); } } catch (Exception e) { if (e.equals(new FileNotFoundException())) { System.out.println("File not found - could not generate stream"); return; } else if (e.equals(new IOException())) { System.out.println("Unable to read file"); } else if (e.equals(new NumberFormatException())) { System.out.println("Unable to convert to time to number - bad time"); } else { System.out.println(e.toString()); } } }
From source file:classif.Prototyper.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { trainingData = data;//from w ww . j ava 2s . com Attribute classAttribute = data.classAttribute(); prototypes = new ArrayList<>(); classedData = new HashMap<String, ArrayList<Sequence>>(); indexClassedDataInFullData = new HashMap<String, ArrayList<Integer>>(); for (int c = 0; c < data.numClasses(); c++) { classedData.put(data.classAttribute().value(c), new ArrayList<Sequence>()); indexClassedDataInFullData.put(data.classAttribute().value(c), new ArrayList<Integer>()); } sequences = new Sequence[data.numInstances()]; classMap = new String[sequences.length]; for (int i = 0; i < sequences.length; i++) { Instance sample = data.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } sequences[i] = new Sequence(sequence); String clas = sample.stringValue(classAttribute); classMap[i] = clas; classedData.get(clas).add(sequences[i]); indexClassedDataInFullData.get(clas).add(i); // System.out.println("Element "+i+" of train is classed "+clas+" and went to element "+(indexClassedDataInFullData.get(clas).size()-1)); } buildSpecificClassifier(data); if (fillPrototypes) addMissingPrototypesRandom(); }
From source file:classif.Prototyper.java
License:Open Source License
public static ClassedSequence[] convertWekaSetToClassedSequence(Instances test) { Attribute classAttribute = test.classAttribute(); ClassedSequence[] testSequences = new ClassedSequence[test.numInstances()]; for (int i = 0; i < testSequences.length; i++) { Instance sample = test.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); }//from www . j a v a 2s.co m String clas = sample.stringValue(classAttribute); testSequences[i] = new ClassedSequence(new Sequence(sequence), clas); } return testSequences; }
From source file:classifier.CustomStringToWordVector.java
License:Open Source License
/** * determines the dictionary./*from w w w.j ava2 s . c o m*/ */ private void determineDictionary() { if (forcedAttributes == null) { // initialize stopwords Stopwords stopwords = new Stopwords(); if (getUseStoplist()) { try { if (getStopwords().exists() && !getStopwords().isDirectory()) stopwords.read(getStopwords()); } catch (Exception e) { e.printStackTrace(); } } // Operate on a per-class basis if class attribute is set int classInd = getInputFormat().classIndex(); int values = 1; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { values = getInputFormat().attribute(classInd).numValues(); } // TreeMap dictionaryArr [] = new TreeMap[values]; TreeMap[] dictionaryArr = new TreeMap[values]; for (int i = 0; i < values; i++) { dictionaryArr[i] = new TreeMap(); } // Make sure we know which fields to convert determineSelectedRange(); // Tokenize all training text into an orderedMap of "words". long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances()); for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance instance = getInputFormat().instance(i); int vInd = 0; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { vInd = (int) instance.classValue(); } // Iterate through all relevant string attributes of the current // instance Hashtable h = new Hashtable(); for (int j = 0; j < instance.numAttributes(); j++) { if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) { // Get tokenizer m_Tokenizer.tokenize(instance.stringValue(j)); // Iterate through tokens, perform stemming, and remove // stopwords // (if required) while (m_Tokenizer.hasMoreElements()) { String word = ((String) m_Tokenizer.nextElement()).intern(); if (this.m_lowerCaseTokens == true) word = word.toLowerCase(); word = m_Stemmer.stem(word); if (this.m_useStoplist == true) if (stopwords.is(word)) continue; if (!(h.contains(word))) h.put(word, new Integer(0)); Count count = (Count) dictionaryArr[vInd].get(word); if (count == null) { dictionaryArr[vInd].put(word, new Count(1)); } else { count.count++; } } } } // updating the docCount for the words that have occurred in // this // instance(document). Enumeration e = h.keys(); while (e.hasMoreElements()) { String word = (String) e.nextElement(); Count c = (Count) dictionaryArr[vInd].get(word); if (c != null) { c.docCount++; } else System.err.println("Warning: A word should definitely be in the " + "dictionary.Please check the code"); } if (pruneRate > 0) { if (i % pruneRate == 0 && i > 0) { for (int z = 0; z < values; z++) { Vector d = new Vector(1000); Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); if (count.count <= 1) { d.add(word); } } Iterator iter = d.iterator(); while (iter.hasNext()) { String word = (String) iter.next(); dictionaryArr[z].remove(word); } } } } } // Figure out the minimum required word frequency int totalsize = 0; int prune[] = new int[values]; for (int z = 0; z < values; z++) { totalsize += dictionaryArr[z].size(); int array[] = new int[dictionaryArr[z].size()]; int pos = 0; Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); array[pos] = count.count; pos++; } // sort the array sortArray(array); if (array.length < m_WordsToKeep) { // if there aren't enough words, set the threshold to // minFreq prune[z] = m_minTermFreq; } else { // otherwise set it to be at least minFreq prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]); } } // Convert the dictionary into an attribute index // and create one attribute per word FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes()); // Add the non-converted attributes int classIndex = -1; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (!m_SelectedRange.isInRange(i)) { if (getInputFormat().classIndex() == i) { classIndex = attributes.size(); } attributes.addElement(getInputFormat().attribute(i).copy()); } } // Add the word vector attributes (eliminating duplicates // that occur in multiple classes) TreeMap newDictionary = new TreeMap(); int index = attributes.size(); for (int z = 0; z < values; z++) { Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); if (count.count >= prune[z]) { if (newDictionary.get(word) == null) { newDictionary.put(word, new Integer(index++)); attributes.addElement(new Attribute(m_Prefix + word)); } } } } // Compute document frequencies m_DocsCounts = new int[attributes.size()]; Iterator it = newDictionary.keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); int idx = ((Integer) newDictionary.get(word)).intValue(); int docsCount = 0; for (int j = 0; j < values; j++) { Count c = (Count) dictionaryArr[j].get(word); if (c != null) docsCount += c.docCount; } m_DocsCounts[idx] = docsCount; } // Trim vector and set instance variables attributes.trimToSize(); m_Dictionary = newDictionary; m_NumInstances = getInputFormat().numInstances(); // Set the filter's output format Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0); outputFormat.setClassIndex(classIndex); setOutputFormat(outputFormat); } else { //m_Dictionary = newDictionary; determineSelectedRange(); m_NumInstances = getInputFormat().numInstances(); TreeMap newDictionary = new TreeMap(); for (int i = 2; i < forcedAttributes.size(); i++) { newDictionary.put(((Attribute) forcedAttributes.get(i)).name(), new Integer(i)); } m_Dictionary = newDictionary; // Set the filter's output format Instances outputFormat = new Instances(getInputFormat().relationName(), forcedAttributes, 0); outputFormat.setClassIndex(1); setOutputFormat(outputFormat); } }
From source file:classifier.CustomStringToWordVector.java
License:Open Source License
/** * Converts the instance w/o normalization. * //from w w w . j a v a 2s .co m * @oaram instance the instance to convert * @param v * @return the conerted instance */ private int convertInstancewoDocNorm(Instance instance, FastVector v) { // Convert the instance into a sorted set of indexes TreeMap contained = new TreeMap(); // Copy all non-converted attributes from input to output int firstCopy = 0; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (!m_SelectedRange.isInRange(i)) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { contained.put(new Integer(firstCopy), new Double(instance.value(i))); } } else { if (instance.isMissing(i)) { contained.put(new Integer(firstCopy), new Double(Utils.missingValue())); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(firstCopy).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(firstCopy) .addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(firstCopy) .addStringValue(instance.stringValue(i)); contained.put(new Integer(firstCopy), new Double(newIndex)); } } firstCopy++; } } for (int j = 0; j < instance.numAttributes(); j++) { // if ((getInputFormat().attribute(j).type() == Attribute.STRING) if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) { m_Tokenizer.tokenize(instance.stringValue(j)); while (m_Tokenizer.hasMoreElements()) { String word = (String) m_Tokenizer.nextElement(); if (this.m_lowerCaseTokens == true) word = word.toLowerCase(); word = m_Stemmer.stem(word); Integer index = (Integer) m_Dictionary.get(word); if (index != null) { if (m_OutputCounts) { // Separate if here rather than // two lines down to avoid // hashtable lookup Double count = (Double) contained.get(index); if (count != null) { contained.put(index, new Double(count.doubleValue() + 1.0)); } else { contained.put(index, new Double(1)); } } else { contained.put(index, new Double(1)); } } } } } // Doing TFTransform if (m_TFTransform == true) { Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); if (index.intValue() >= firstCopy) { double val = ((Double) contained.get(index)).doubleValue(); val = Math.log(val + 1); contained.put(index, new Double(val)); } } } // Doing IDFTransform if (m_IDFTransform == true) { Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); if (index.intValue() >= firstCopy) { double val = ((Double) contained.get(index)).doubleValue(); val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]); contained.put(index, new Double(val)); } } } // Convert the set to structures needed to create a sparse instance. double[] values = new double[contained.size()]; int[] indices = new int[contained.size()]; Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); Double value = (Double) contained.get(index); values[i] = value.doubleValue(); indices[i] = index.intValue(); } Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes()); inst.setDataset(outputFormatPeek()); v.addElement(inst); return firstCopy; }
From source file:classify.Classifier.java
public static void missingValuesRows(Instances data) { int[] missingValues = new int[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { missingValues[i] = 0;/*from w ww .j av a 2 s . co m*/ } Instance example; String value = ""; //get number of missing attributes per row int missValues = 0; for (int i = 0; i < data.numInstances(); i++) { example = data.instance(i); for (int j = 0; j < 15; j++) { if (example.attribute(j).isNominal()) { value = example.stringValue(j); } else if (example.attribute(j).isNumeric()) { value = Double.toString(example.value(j)); } if (value.equals("?") || value.equals("NaN")) { missingValues[i]++; missValues++; } } } System.out.println("Number of Missing Values: " + missValues); //get how many times i attributes are missing int[] frequency = new int[15]; for (int i = 0; i < data.numInstances(); i++) { frequency[missingValues[i]]++; } int numRows = 0; for (int i = 0; i < data.numInstances(); i++) { if (missingValues[i] > 0) { numRows++; } } System.out.println("Number of rows with missing values: " + numRows); System.out.println("Number of missing attributes per row:"); for (int i = 0; i < 15; i++) { System.out.println(i + ": " + frequency[i]); } }
From source file:classify.Classifier.java
public static void setAttributeValues(Instances data) { Instance example; String[][] savedData = new String[data.numInstances()][10]; for (int i = 0; i < data.numInstances(); i++) { example = data.instance(i);/*from ww w. j ava2 s. c o m*/ savedData[i][0] = example.stringValue(0); savedData[i][1] = example.stringValue(3); savedData[i][2] = example.stringValue(4); savedData[i][3] = example.stringValue(5); savedData[i][4] = example.stringValue(6); savedData[i][5] = example.stringValue(8); savedData[i][6] = example.stringValue(9); savedData[i][7] = example.stringValue(11); savedData[i][8] = example.stringValue(12); savedData[i][9] = example.stringValue(15); } //add in values for discrete attributes //A1 FastVector attVals = new FastVector(); attVals.addElement("b"); attVals.addElement("a"); data.deleteAttributeAt(0); data.insertAttributeAt(new Attribute("A1", attVals), 0); //A4 attVals = new FastVector(); attVals.addElement("u"); attVals.addElement("y"); attVals.addElement("l"); attVals.addElement("t"); data.deleteAttributeAt(3); data.insertAttributeAt(new Attribute("A4", attVals), 3); //A5 attVals = new FastVector(); attVals.addElement("g"); attVals.addElement("p"); attVals.addElement("gg"); data.deleteAttributeAt(4); data.insertAttributeAt(new Attribute("A5", attVals), 4); //A6 attVals = new FastVector(); attVals.addElement("c"); attVals.addElement("d"); attVals.addElement("cc"); attVals.addElement("i"); attVals.addElement("j"); attVals.addElement("k"); attVals.addElement("m"); attVals.addElement("r"); attVals.addElement("q"); attVals.addElement("w"); attVals.addElement("x"); attVals.addElement("e"); attVals.addElement("aa"); attVals.addElement("ff"); data.deleteAttributeAt(5); data.insertAttributeAt(new Attribute("A6", attVals), 5); //A7 attVals = new FastVector(); attVals.addElement("v"); attVals.addElement("h"); attVals.addElement("bb"); attVals.addElement("j"); attVals.addElement("n"); attVals.addElement("z"); attVals.addElement("dd"); attVals.addElement("ff"); attVals.addElement("o"); data.deleteAttributeAt(6); data.insertAttributeAt(new Attribute("A7", attVals), 6); //A9 attVals = new FastVector(); attVals.addElement("t"); attVals.addElement("f"); data.deleteAttributeAt(8); data.insertAttributeAt(new Attribute("A9", attVals), 8); //A10 attVals = new FastVector(); attVals.addElement("t"); attVals.addElement("f"); data.deleteAttributeAt(9); data.insertAttributeAt(new Attribute("A10", attVals), 9); //A12 attVals = new FastVector(); attVals.addElement("t"); attVals.addElement("f"); data.deleteAttributeAt(11); data.insertAttributeAt(new Attribute("A12", attVals), 11); //A13 attVals = new FastVector(); attVals.addElement("g"); attVals.addElement("p"); attVals.addElement("s"); data.deleteAttributeAt(12); data.insertAttributeAt(new Attribute("A13", attVals), 12); //Class attVals = new FastVector(); attVals.addElement("+"); attVals.addElement("-"); data.deleteAttributeAt(15); data.insertAttributeAt(new Attribute("C", attVals), 15); for (int i = 0; i < data.numInstances(); i++) { if (!"?".equals(savedData[i][0])) { data.instance(i).setValue(0, savedData[i][0]); } if (!"?".equals(savedData[i][1])) { data.instance(i).setValue(3, savedData[i][1]); } if (!"?".equals(savedData[i][2])) { data.instance(i).setValue(4, savedData[i][2]); } if (!"?".equals(savedData[i][3])) { data.instance(i).setValue(5, savedData[i][3]); } if (!"?".equals(savedData[i][4])) { data.instance(i).setValue(6, savedData[i][4]); } if (!"?".equals(savedData[i][5])) { data.instance(i).setValue(8, savedData[i][5]); } if (!"?".equals(savedData[i][6])) { data.instance(i).setValue(9, savedData[i][6]); } if (!"?".equals(savedData[i][7])) { data.instance(i).setValue(11, savedData[i][7]); } if (!"?".equals(savedData[i][8])) { data.instance(i).setValue(12, savedData[i][8]); } if (!"?".equals(savedData[i][9])) { data.instance(i).setValue(15, savedData[i][9]); } } }
From source file:Clustering.WekaKMeansClustererWrapper.java
public ArrayList<String>[] classify(HashMap<String, List> data, boolean clearData) { ArrayList<String>[] clusterResult; try {/* www .jav a2 s . c om*/ File arff = m_ArffExporter.getArff(data); int nSize = data.size(); if (arff == null) return null; if (clearData) data.clear(); FileInputStream is = new FileInputStream(arff.getAbsolutePath()); Instances instances = ConverterUtils.DataSource.read(is); is.close(); String[] keys = new String[instances.numInstances()]; for (int i = 0; i < instances.numInstances(); ++i) { Instance instance = instances.instance(i); keys[i] = instance.stringValue(0); // assume that the 0th attribute is the key string } instances.deleteStringAttributes(); SimpleKMeans cl = new SimpleKMeans(); int numClusters = m_NumberOfClusters < nSize ? m_NumberOfClusters : nSize; String[] options = new String[5]; options[0] = "-O"; options[1] = "-N"; options[2] = Integer.toString(numClusters); options[3] = "-A"; options[4] = m_DistanceFunction; cl.setOptions(options); //System.out.println( "Clustering" ); cl.buildClusterer(instances); //System.out.println( "Create ArrayList" ); clusterResult = new ArrayList[m_NumberOfClusters]; for (int i = 0; i < m_NumberOfClusters; ++i) { clusterResult[i] = new ArrayList<>(); } //System.out.println( "Assigning" ); int[] assignment = cl.getAssignments(); for (int i = 0; i < assignment.length; ++i) { clusterResult[assignment[i]].add(keys[i]); } //System.out.println( "Done" ); if (!arff.delete()) arff.deleteOnExit(); } catch (Exception ex) { //System.out.println( "[EXCEPTION] " + ex.getMessage() ); m_LastErrorMessage = ex.getMessage(); return null; } return clusterResult; }