List of usage examples for weka.core Attribute addStringValue
public int addStringValue(String value)
From source file:adams.ml.data.WekaConverter.java
License:Open Source License
/** * Turns an ADAMS dataset row into a Weka Instance. * * @param data the dataset to use as template * @param row the row to convert/*from w ww . j ava2 s. c o m*/ * @return the generated instance * @throws Exception if conversion fails */ public static Instance toInstance(Instances data, Row row) throws Exception { Instance result; double[] values; int i; Cell cell; Attribute att; values = new double[data.numAttributes()]; for (i = 0; i < data.numAttributes(); i++) { values[i] = Utils.missingValue(); if (!row.hasCell(i)) continue; cell = row.getCell(i); if (cell.isMissing()) continue; att = data.attribute(i); switch (att.type()) { case Attribute.NUMERIC: values[i] = cell.toDouble(); break; case Attribute.DATE: values[i] = cell.toAnyDateType().getTime(); break; case Attribute.NOMINAL: values[i] = att.indexOfValue(cell.getContent()); break; case Attribute.STRING: values[i] = att.addStringValue(cell.getContent()); break; default: throw new Exception("Unhandled Weka attribute type: " + Attribute.typeToString(att)); } } result = new DenseInstance(1.0, values); result.setDataset(data); return result; }
From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetClassifier.java
License:Apache License
/** * Method that converts a text message into an instance. * * @param text the message content to convert * @param data the header information/*from ww w .j av a 2s . co m*/ * @return the generated Instance */ private Instance makeInstance(String text, Instances data) { Instance instance = new Instance(2); Attribute messageAtt = data.attribute("content"); instance.setValue(messageAtt, messageAtt.addStringValue(text)); instance.setDataset(data); return instance; }
From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java
License:Open Source License
/** * FAST HACK REMOVING FUNCTIONALITIES FROM WEKA ORIGINAL METHOD! * /*ww w.ja v a2 s .co m*/ * @param rs * @return * @throws SQLException */ public static Instances retrieveInstanceFromResultSet(ResultSet rs) throws SQLException { ResultSetMetaData md = rs.getMetaData(); // Determine structure of the instances int numAttributes = md.getColumnCount(); int[] attributeTypes = new int[numAttributes]; Hashtable[] nominalIndexes = new Hashtable[numAttributes]; FastVector[] nominalStrings = new FastVector[numAttributes]; for (int i = 1; i <= numAttributes; i++) { attributeTypes[i - 1] = Attribute.NUMERIC; } // For sqlite // cache column names because the last while(rs.next()) { iteration for // the tuples below will close the md object: Vector<String> columnNames = new Vector<String>(); for (int i = 0; i < numAttributes; i++) { columnNames.add(md.getColumnName(i + 1)); } // Step through the tuples FastVector instances = new FastVector(); int rowCount = 0; while (rs.next()) { double[] vals = new double[numAttributes]; for (int i = 1; i <= numAttributes; i++) { int in = rs.getInt(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = in; } Instance newInst = new Instance(1.0, vals); instances.addElement(newInst); rowCount++; } } // disconnectFromDatabase(); (perhaps other queries might be made) // Create the header and add the instances to the dataset FastVector attribInfo = new FastVector(); for (int i = 0; i < numAttributes; i++) { /* Fix for databases that uppercase column names */ // String attribName = attributeCaseFix(md.getColumnName(i + 1)); String attribName = columnNames.get(i); switch (attributeTypes[i]) { case Attribute.NOMINAL: attribInfo.addElement(new Attribute(attribName, nominalStrings[i])); break; case Attribute.NUMERIC: attribInfo.addElement(new Attribute(attribName)); break; case Attribute.STRING: Attribute att = new Attribute(attribName, (FastVector) null); attribInfo.addElement(att); for (int n = 0; n < nominalStrings[i].size(); n++) { att.addStringValue((String) nominalStrings[i].elementAt(n)); } break; case Attribute.DATE: attribInfo.addElement(new Attribute(attribName, (String) null)); break; default: throw new SQLException("Unknown attribute type"); } } Instances result = new Instances("QueryResult", attribInfo, instances.size()); for (int i = 0; i < instances.size(); i++) { result.add((Instance) instances.elementAt(i)); } rs.close(); return result; }
From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java
License:Open Source License
/** * Given a (possibly empty) Instances object containing the required weka Attributes, generates a weka Instance for a * single data point.//from ww w . ja va 2 s . co m * * @param instances the weka Instances object containing attributes * @param data_point the data point to convert * @return a weka instance with assigned attributes */ protected static Instance assignWekaAttributes(Instances instances, Word data_point) { double[] instance = new double[instances.numAttributes()]; for (int i = 0; i < instances.numAttributes(); ++i) { Attribute attribute = instances.attribute(i); if (data_point.hasAttribute(attribute.name()) && !data_point.getAttribute(attribute.name()).toString().equals("?")) { switch (attribute.type()) { case Attribute.NOMINAL: int index = attribute.indexOfValue(data_point.getAttribute(attribute.name()).toString()); instance[i] = (double) index; break; case Attribute.NUMERIC: // Check if value is really a number. try { instance[i] = Double.valueOf(data_point.getAttribute(attribute.name()).toString()); } catch (NumberFormatException e) { AuToBIUtils.error("Number expected for feature: " + attribute.name()); } break; case Attribute.STRING: instance[i] = attribute.addStringValue(data_point.getAttribute(attribute.name()).toString()); break; default: AuToBIUtils.error("Unknown attribute type"); } } else { instance[i] = Utils.missingValue(); } } Instance inst = new DenseInstance(1, instance); inst.setDataset(instances); return inst; }
From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java
License:Open Source License
/** * Set the output format if the class is nominal. */// w w w . j av a2s . c om private void setOutputFormat() { FastVector newAtts; Instances outputFormat; newAtts = new FastVector(); BitSet attrSrc = new BitSet(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = null; Attribute srcAtt = getInputFormat().attribute(j); if (!m_Columns.isInRange(j) || srcAtt.indexOfValue(m_ReplVal) >= 0) { att = (Attribute) srcAtt.copy(); } else if (srcAtt.isNominal()) { Enumeration<String> valsEnum = srcAtt.enumerateValues(); ArrayList<String> valsList = new ArrayList<String>(); while (valsEnum.hasMoreElements()) { valsList.add(valsEnum.nextElement()); } valsList.add(m_ReplVal); att = new Attribute(srcAtt.name(), valsList); } else { // string attributes att = (Attribute) srcAtt.copy(); att.addStringValue(m_ReplVal); } newAtts.addElement(att); attrSrc.set(j); } outputFormat = new Instances(getInputFormat().relationName(), newAtts, 0); outputFormat.setClassIndex(getInputFormat().classIndex()); setOutputFormat(outputFormat); m_StringToCopy = new AttributeLocator(getInputFormat(), Attribute.STRING, MathUtils.findTrue(attrSrc)); }
From source file:form.ml.ClassifierTemplate.java
/** * make the Instance weka object from a String * * @param text the String to be converted * @return Instance object//from w ww. j a va 2 s . c o m */ private Instance makeInstance(String text) { Instance instance = new Instance(2); Attribute attribute = train.attribute("text"); instance.setValue(attribute, attribute.addStringValue(text)); instance.setDataset(train); return instance; }
From source file:py.fpuna.lib.ExtendedInstanceQuery.java
License:Open Source License
/** * Makes a database query to convert a table into a set of instances * * @param query the query to convert to instances * @return the instances contained in the result of the query, NULL if the * SQL query doesn't return a ResultSet, e.g., DELETE/INSERT/UPDATE * @throws Exception if an error occurs// w w w . ja v a 2 s . co m */ public Instances retrieveInstances(String query) throws Exception { if (m_Debug) System.err.println("Executing query: " + query); connectToDatabase(); if (execute(query) == false) { if (m_PreparedStatement.getUpdateCount() == -1) { throw new Exception("Query didn't produce results"); } else { if (m_Debug) System.err.println(m_PreparedStatement.getUpdateCount() + " rows affected."); close(); return null; } } ResultSet rs = getResultSet(); if (m_Debug) System.err.println("Getting metadata..."); ResultSetMetaData md = rs.getMetaData(); if (m_Debug) System.err.println("Completed getting metadata..."); // Determine structure of the instances int numAttributes = md.getColumnCount(); int[] attributeTypes = new int[numAttributes]; Hashtable[] nominalIndexes = new Hashtable[numAttributes]; FastVector[] nominalStrings = new FastVector[numAttributes]; for (int i = 1; i <= numAttributes; i++) { /* switch (md.getColumnType(i)) { case Types.CHAR: case Types.VARCHAR: case Types.LONGVARCHAR: case Types.BINARY: case Types.VARBINARY: case Types.LONGVARBINARY:*/ switch (translateDBColumnType(md.getColumnTypeName(i))) { case STRING: //System.err.println("String --> nominal"); attributeTypes[i - 1] = Attribute.NOMINAL; nominalIndexes[i - 1] = new Hashtable(); nominalStrings[i - 1] = new FastVector(); break; case TEXT: //System.err.println("Text --> string"); attributeTypes[i - 1] = Attribute.STRING; nominalIndexes[i - 1] = new Hashtable(); nominalStrings[i - 1] = new FastVector(); break; case BOOL: //System.err.println("boolean --> nominal"); attributeTypes[i - 1] = Attribute.NOMINAL; nominalIndexes[i - 1] = new Hashtable(); nominalIndexes[i - 1].put("false", new Double(0)); nominalIndexes[i - 1].put("true", new Double(1)); nominalStrings[i - 1] = new FastVector(); nominalStrings[i - 1].addElement("false"); nominalStrings[i - 1].addElement("true"); break; case DOUBLE: //System.err.println("BigDecimal --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case BYTE: //System.err.println("byte --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case SHORT: //System.err.println("short --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case INTEGER: //System.err.println("int --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case LONG: //System.err.println("long --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case FLOAT: //System.err.println("float --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case DATE: attributeTypes[i - 1] = Attribute.DATE; break; case TIME: attributeTypes[i - 1] = Attribute.DATE; break; default: //System.err.println("Unknown column type"); attributeTypes[i - 1] = Attribute.STRING; } } // For sqlite // cache column names because the last while(rs.next()) { iteration for // the tuples below will close the md object: Vector<String> columnNames = new Vector<String>(); for (int i = 0; i < numAttributes; i++) { columnNames.add(md.getColumnLabel(i + 1)); } // Step through the tuples if (m_Debug) System.err.println("Creating instances..."); FastVector instances = new FastVector(); int rowCount = 0; while (rs.next()) { if (rowCount % 100 == 0) { if (m_Debug) { System.err.print("read " + rowCount + " instances \r"); System.err.flush(); } } double[] vals = new double[numAttributes]; for (int i = 1; i <= numAttributes; i++) { /*switch (md.getColumnType(i)) { case Types.CHAR: case Types.VARCHAR: case Types.LONGVARCHAR: case Types.BINARY: case Types.VARBINARY: case Types.LONGVARBINARY:*/ switch (translateDBColumnType(md.getColumnTypeName(i))) { case STRING: String str = rs.getString(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { Double index = (Double) nominalIndexes[i - 1].get(str); if (index == null) { index = new Double(nominalStrings[i - 1].size()); nominalIndexes[i - 1].put(str, index); nominalStrings[i - 1].addElement(str); } vals[i - 1] = index.doubleValue(); } break; case TEXT: String txt = rs.getString(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { Double index = (Double) nominalIndexes[i - 1].get(txt); if (index == null) { // Need to add one because first value in // string attribute is dummy value. index = new Double(nominalStrings[i - 1].size()) + 1; nominalIndexes[i - 1].put(txt, index); nominalStrings[i - 1].addElement(txt); } vals[i - 1] = index.doubleValue(); } break; case BOOL: boolean boo = rs.getBoolean(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (boo ? 1.0 : 0.0); } break; case DOUBLE: // BigDecimal bd = rs.getBigDecimal(i, 4); double dd = rs.getDouble(i); // Use the column precision instead of 4? if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { // newInst.setValue(i - 1, bd.doubleValue()); vals[i - 1] = dd; } break; case BYTE: byte by = rs.getByte(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (double) by; } break; case SHORT: short sh = rs.getShort(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (double) sh; } break; case INTEGER: int in = rs.getInt(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (double) in; } break; case LONG: long lo = rs.getLong(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (double) lo; } break; case FLOAT: float fl = rs.getFloat(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (double) fl; } break; case DATE: Date date = rs.getDate(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { // TODO: Do a value check here. vals[i - 1] = (double) date.getTime(); } break; case TIME: Time time = rs.getTime(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { // TODO: Do a value check here. vals[i - 1] = (double) time.getTime(); } break; default: vals[i - 1] = Instance.missingValue(); } } Instance newInst; if (m_CreateSparseData) { newInst = new SparseInstance(1.0, vals); } else { newInst = new Instance(1.0, vals); } instances.addElement(newInst); rowCount++; } //disconnectFromDatabase(); (perhaps other queries might be made) // Create the header and add the instances to the dataset if (m_Debug) System.err.println("Creating header..."); FastVector attribInfo = new FastVector(); for (int i = 0; i < numAttributes; i++) { /* Fix for databases that uppercase column names */ // String attribName = attributeCaseFix(md.getColumnName(i + 1)); String attribName = attributeCaseFix(columnNames.get(i)); switch (attributeTypes[i]) { case Attribute.NOMINAL: attribInfo.addElement(new Attribute(attribName, nominalStrings[i])); break; case Attribute.NUMERIC: attribInfo.addElement(new Attribute(attribName)); break; case Attribute.STRING: Attribute att = new Attribute(attribName, (FastVector) null); attribInfo.addElement(att); for (int n = 0; n < nominalStrings[i].size(); n++) { att.addStringValue((String) nominalStrings[i].elementAt(n)); } break; case Attribute.DATE: attribInfo.addElement(new Attribute(attribName, (String) null)); break; default: throw new Exception("Unknown attribute type"); } } Instances result = new Instances("QueryResult", attribInfo, instances.size()); for (int i = 0; i < instances.size(); i++) { result.add((Instance) instances.elementAt(i)); } close(rs); return result; }
From source file:sg.edu.nus.comp.nlp.ims.io.CWekaLexeltWriter.java
License:Open Source License
@Override public Object getInstances(ILexelt p_Lexelt) throws ClassNotFoundException { String relation = p_Lexelt.getID(); FastVector attributes = new FastVector(); int capacity = p_Lexelt.size(); IStatistic stat = p_Lexelt.getStatistic(); Attribute ids = new Attribute("#ID"); attributes.addElement(ids);//from w ww . j av a 2 s . c o m int keySize = stat.getKeys().size(); for (int keyIdx = 0; keyIdx < keySize; keyIdx++) { String key = stat.getKey(keyIdx); String type = stat.getType(keyIdx); if (ANumericFeature.class.isAssignableFrom(Class.forName(type))) { attributes.addElement(new Attribute(key)); } else { FastVector attributeValues = new FastVector(); List<String> values = stat.getValue(keyIdx); for (String value : values) { attributeValues.addElement(value); } if (attributeValues.size() == 0) { throw new IllegalStateException("No attribute specified."); } attributes.addElement(new Attribute(key, attributeValues)); } } FastVector attributeValues = new FastVector(); for (String tag : stat.getTags()) { attributeValues.addElement(tag); } attributes.addElement(new Attribute("#TAG", attributeValues)); Instances instances = new Instances(relation, attributes, capacity); for (int instIdx = 0; instIdx < p_Lexelt.size(); instIdx++) { IInstance instance = p_Lexelt.getInstance(instIdx); int keyIdx = 0; double value; IFeature feature; int featureSize = instance.size(); Hashtable<Integer, Double> features = new Hashtable<Integer, Double>(); ArrayList<Integer> exist = new ArrayList<Integer>(); for (int featIdx = 0; featIdx < featureSize; featIdx++) { feature = instance.getFeature(featIdx); keyIdx = stat.getIndex(feature.getKey()); if (keyIdx < 0) { continue; } if (ANumericFeature.class.isInstance(feature)) { value = Double.parseDouble(feature.getValue()); } else if (ABinaryFeature.class.isInstance(feature)) { value = instances.attribute(keyIdx + 1).indexOfValue(feature.getValue()); } else { String fv = feature.getValue(); if (fv == null || !stat.contains(keyIdx, fv)) { fv = stat.getDefaultValue(); } value = instances.attribute(keyIdx + 1).indexOfValue(fv); } features.put(keyIdx + 1, value); exist.add(keyIdx + 1); } exist.add(keySize + 1); Collections.sort(exist); double[] attValues = new double[keySize + 2]; ids.addStringValue(instance.getID()); attValues[0] = ids.indexOfValue(instance.getID()); int begin, end = -1; for (int valueIdx = 0; valueIdx < exist.size(); valueIdx++) { begin = end + 1; end = exist.get(valueIdx); for (int i = begin; i < end; i++) { if (instances.attribute(i).isNumeric()) { attValues[i] = 0; } else { attValues[i] = instances.attribute(i).indexOfValue("0"); } } if (end <= keySize) { attValues[end] = features.get(end); } } for (String tag : instance.getTag()) { if (tag.equals("'?'") || tag.equals("?")) { attValues[keySize + 1] = Instance.missingValue(); } else { attValues[keySize + 1] = instances.attribute(keySize + 1).indexOfValue(tag); } Instance ins = new Instance(1, attValues); instances.add(ins); } if (instance.getTag().size() == 0) { attValues[keySize + 1] = Instance.missingValue(); Instance ins = new Instance(1, attValues); instances.add(ins); } } return instances; }
From source file:sg.edu.nus.comp.nlp.ims.io.CWekaSparseLexeltWriter.java
License:Open Source License
@Override public Object getInstances(ILexelt p_Lexelt) throws ClassNotFoundException { String relation = p_Lexelt.getID(); FastVector attributes = new FastVector(); int capacity = p_Lexelt.size(); IStatistic stat = p_Lexelt.getStatistic(); Attribute ids = new Attribute("#ID"); attributes.addElement(ids);/*from www. j a v a 2 s . c o m*/ int keySize = stat.getKeys().size(); for (int keyIdx = 0; keyIdx < keySize; keyIdx++) { String key = stat.getKey(keyIdx); String type = stat.getType(keyIdx); if (ANumericFeature.class.isAssignableFrom(Class.forName(type))) { attributes.addElement(new Attribute(key)); } else { FastVector attributeValues = new FastVector(); List<String> values = stat.getValue(keyIdx); for (String value : values) { attributeValues.addElement(value); } if (attributeValues.size() == 0) { throw new IllegalStateException("No attribute specified."); } attributes.addElement(new Attribute(key, attributeValues)); } } FastVector attributeValues = new FastVector(); for (String tag : stat.getTags()) { attributeValues.addElement(tag); } attributes.addElement(new Attribute("#TAG", attributeValues)); Instances instances = new Instances(relation, attributes, capacity); for (int instIdx = 0; instIdx < p_Lexelt.size(); instIdx++) { IInstance instance = p_Lexelt.getInstance(instIdx); int keyIdx = 0; double value; IFeature feature; int featureSize = instance.size(); Hashtable<Integer, Double> features = new Hashtable<Integer, Double>(); ArrayList<Integer> exist = new ArrayList<Integer>(); for (int featIdx = 0; featIdx < featureSize; featIdx++) { feature = instance.getFeature(featIdx); keyIdx = stat.getIndex(feature.getKey()); if (keyIdx < 0) { continue; } if (ANumericFeature.class.isInstance(feature)) { value = Double.parseDouble(feature.getValue()); } else if (ABinaryFeature.class.isInstance(feature)) { value = instances.attribute(keyIdx + 1).indexOfValue(feature.getValue()); } else { String fv = feature.getValue(); if (fv == null || !stat.contains(keyIdx, fv)) { fv = stat.getDefaultValue(); } value = instances.attribute(keyIdx + 1).indexOfValue(fv); } features.put(keyIdx + 1, value); exist.add(keyIdx + 1); } Collections.sort(exist); double[] attrValues = new double[exist.size() + 2]; int[] indices = new int[exist.size() + 2]; ids.addStringValue(instance.getID()); attrValues[0] = ids.indexOfValue(instance.getID()); indices[0] = 0; for (int valueIdx = 0; valueIdx < exist.size(); valueIdx++) { indices[valueIdx + 1] = exist.get(valueIdx); attrValues[valueIdx + 1] = features.get(indices[valueIdx + 1]); } Attribute tags = instances.attribute(keySize + 1); indices[exist.size() + 1] = keySize + 1; for (String tag : instance.getTag()) { if (tag.equals("'?'") || tag.equals("?")) { attrValues[exist.size() + 1] = Instance.missingValue(); } else { attrValues[exist.size() + 1] = tags.indexOfValue(tag); } SparseInstance ins = new SparseInstance(1, attrValues, indices, keySize + 2); instances.add(ins); } if (instance.getTag().size() == 0) { attrValues[exist.size() + 1] = Instance.missingValue(); SparseInstance ins = new SparseInstance(1, attrValues, indices, keySize + 2); instances.add(ins); } } return instances; }