Example usage for weka.core Attribute addStringValue

List of usage examples for weka.core Attribute addStringValue

Introduction

In this page you can find the example usage for weka.core Attribute addStringValue.

Prototype


public int addStringValue(String value) 

Source Link

Document

Adds a string value to the list of valid strings for attributes of type STRING and returns the index of the string.

Usage

From source file:adams.ml.data.WekaConverter.java

License:Open Source License

/**
 * Turns an ADAMS dataset row into a Weka Instance.
 *
 * @param data   the dataset to use as template
 * @param row      the row to convert/*from w ww  . j  ava2  s. c  o  m*/
 * @return      the generated instance
 * @throws Exception   if conversion fails
 */
public static Instance toInstance(Instances data, Row row) throws Exception {
    Instance result;
    double[] values;
    int i;
    Cell cell;
    Attribute att;

    values = new double[data.numAttributes()];
    for (i = 0; i < data.numAttributes(); i++) {
        values[i] = Utils.missingValue();

        if (!row.hasCell(i))
            continue;
        cell = row.getCell(i);
        if (cell.isMissing())
            continue;

        att = data.attribute(i);
        switch (att.type()) {
        case Attribute.NUMERIC:
            values[i] = cell.toDouble();
            break;
        case Attribute.DATE:
            values[i] = cell.toAnyDateType().getTime();
            break;
        case Attribute.NOMINAL:
            values[i] = att.indexOfValue(cell.getContent());
            break;
        case Attribute.STRING:
            values[i] = att.addStringValue(cell.getContent());
            break;
        default:
            throw new Exception("Unhandled Weka attribute type: " + Attribute.typeToString(att));
        }
    }

    result = new DenseInstance(1.0, values);
    result.setDataset(data);

    return result;
}

From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetClassifier.java

License:Apache License

/**
 * Method that converts a text message into an instance.
 *
 * @param text the message content to convert
 * @param data the header information/*from   ww w .j  av  a 2s  . co m*/
 * @return the generated Instance
 */
private Instance makeInstance(String text, Instances data) {
    Instance instance = new Instance(2);
    Attribute messageAtt = data.attribute("content");
    instance.setValue(messageAtt, messageAtt.addStringValue(text));
    instance.setDataset(data);
    return instance;
}

From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java

License:Open Source License

/**
 * FAST HACK REMOVING FUNCTIONALITIES FROM WEKA ORIGINAL METHOD!
 * /*ww  w.ja v  a2 s .co m*/
 * @param rs
 * @return
 * @throws SQLException
 */
public static Instances retrieveInstanceFromResultSet(ResultSet rs) throws SQLException {

    ResultSetMetaData md = rs.getMetaData();

    // Determine structure of the instances
    int numAttributes = md.getColumnCount();
    int[] attributeTypes = new int[numAttributes];
    Hashtable[] nominalIndexes = new Hashtable[numAttributes];
    FastVector[] nominalStrings = new FastVector[numAttributes];
    for (int i = 1; i <= numAttributes; i++) {
        attributeTypes[i - 1] = Attribute.NUMERIC;
    }

    // For sqlite
    // cache column names because the last while(rs.next()) { iteration for
    // the tuples below will close the md object:
    Vector<String> columnNames = new Vector<String>();
    for (int i = 0; i < numAttributes; i++) {
        columnNames.add(md.getColumnName(i + 1));
    }

    // Step through the tuples
    FastVector instances = new FastVector();
    int rowCount = 0;
    while (rs.next()) {

        double[] vals = new double[numAttributes];
        for (int i = 1; i <= numAttributes; i++) {

            int in = rs.getInt(i);
            if (rs.wasNull()) {
                vals[i - 1] = Instance.missingValue();
            } else {
                vals[i - 1] = in;
            }
            Instance newInst = new Instance(1.0, vals);
            instances.addElement(newInst);
            rowCount++;
        }
    }
    // disconnectFromDatabase(); (perhaps other queries might be made)

    // Create the header and add the instances to the dataset
    FastVector attribInfo = new FastVector();
    for (int i = 0; i < numAttributes; i++) {
        /* Fix for databases that uppercase column names */
        // String attribName = attributeCaseFix(md.getColumnName(i + 1));
        String attribName = columnNames.get(i);
        switch (attributeTypes[i]) {
        case Attribute.NOMINAL:
            attribInfo.addElement(new Attribute(attribName, nominalStrings[i]));
            break;
        case Attribute.NUMERIC:
            attribInfo.addElement(new Attribute(attribName));
            break;
        case Attribute.STRING:
            Attribute att = new Attribute(attribName, (FastVector) null);
            attribInfo.addElement(att);
            for (int n = 0; n < nominalStrings[i].size(); n++) {
                att.addStringValue((String) nominalStrings[i].elementAt(n));
            }
            break;
        case Attribute.DATE:
            attribInfo.addElement(new Attribute(attribName, (String) null));
            break;
        default:
            throw new SQLException("Unknown attribute type");
        }
    }

    Instances result = new Instances("QueryResult", attribInfo, instances.size());
    for (int i = 0; i < instances.size(); i++) {
        result.add((Instance) instances.elementAt(i));
    }

    rs.close();

    return result;

}

From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java

License:Open Source License

/**
 * Given a (possibly empty) Instances object containing the required weka Attributes, generates a weka Instance for a
 * single data point.//from  ww w  . ja  va  2 s  . co m
 *
 * @param instances  the weka Instances object containing attributes
 * @param data_point the data point to convert
 * @return a weka instance with assigned attributes
 */
protected static Instance assignWekaAttributes(Instances instances, Word data_point) {
    double[] instance = new double[instances.numAttributes()];

    for (int i = 0; i < instances.numAttributes(); ++i) {
        Attribute attribute = instances.attribute(i);
        if (data_point.hasAttribute(attribute.name())
                && !data_point.getAttribute(attribute.name()).toString().equals("?")) {
            switch (attribute.type()) {
            case Attribute.NOMINAL:
                int index = attribute.indexOfValue(data_point.getAttribute(attribute.name()).toString());
                instance[i] = (double) index;
                break;
            case Attribute.NUMERIC:
                // Check if value is really a number.
                try {
                    instance[i] = Double.valueOf(data_point.getAttribute(attribute.name()).toString());
                } catch (NumberFormatException e) {
                    AuToBIUtils.error("Number expected for feature: " + attribute.name());
                }
                break;
            case Attribute.STRING:
                instance[i] = attribute.addStringValue(data_point.getAttribute(attribute.name()).toString());
                break;
            default:
                AuToBIUtils.error("Unknown attribute type");
            }
        } else {
            instance[i] = Utils.missingValue();
        }
    }

    Instance inst = new DenseInstance(1, instance);
    inst.setDataset(instances);
    return inst;
}

From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java

License:Open Source License

/**
 * Set the output format if the class is nominal.
 */// w  w  w  . j  av a2s . c om
private void setOutputFormat() {

    FastVector newAtts;
    Instances outputFormat;

    newAtts = new FastVector();

    BitSet attrSrc = new BitSet();

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {

        Attribute att = null;
        Attribute srcAtt = getInputFormat().attribute(j);

        if (!m_Columns.isInRange(j) || srcAtt.indexOfValue(m_ReplVal) >= 0) {
            att = (Attribute) srcAtt.copy();
        } else if (srcAtt.isNominal()) {

            Enumeration<String> valsEnum = srcAtt.enumerateValues();
            ArrayList<String> valsList = new ArrayList<String>();

            while (valsEnum.hasMoreElements()) {
                valsList.add(valsEnum.nextElement());
            }
            valsList.add(m_ReplVal);

            att = new Attribute(srcAtt.name(), valsList);
        } else { // string attributes
            att = (Attribute) srcAtt.copy();
            att.addStringValue(m_ReplVal);
        }

        newAtts.addElement(att);
        attrSrc.set(j);
    }

    outputFormat = new Instances(getInputFormat().relationName(), newAtts, 0);
    outputFormat.setClassIndex(getInputFormat().classIndex());

    setOutputFormat(outputFormat);

    m_StringToCopy = new AttributeLocator(getInputFormat(), Attribute.STRING, MathUtils.findTrue(attrSrc));
}

From source file:form.ml.ClassifierTemplate.java

/**
 * make the Instance weka object from a String
 *
 * @param text the String to be converted
 * @return Instance object//from  w  ww. j a va 2 s .  c o  m
 */
private Instance makeInstance(String text) {
    Instance instance = new Instance(2);
    Attribute attribute = train.attribute("text");
    instance.setValue(attribute, attribute.addStringValue(text));
    instance.setDataset(train);
    return instance;
}

From source file:py.fpuna.lib.ExtendedInstanceQuery.java

License:Open Source License

/**
 * Makes a database query to convert a table into a set of instances
 *
 * @param query the query to convert to instances
 * @return the instances contained in the result of the query, NULL if the
 * SQL query doesn't return a ResultSet, e.g., DELETE/INSERT/UPDATE
 * @throws Exception if an error occurs// w  w w  . ja v  a 2  s .  co  m
 */
public Instances retrieveInstances(String query) throws Exception {

    if (m_Debug)
        System.err.println("Executing query: " + query);
    connectToDatabase();
    if (execute(query) == false) {
        if (m_PreparedStatement.getUpdateCount() == -1) {
            throw new Exception("Query didn't produce results");
        } else {
            if (m_Debug)
                System.err.println(m_PreparedStatement.getUpdateCount() + " rows affected.");
            close();
            return null;
        }
    }
    ResultSet rs = getResultSet();
    if (m_Debug)
        System.err.println("Getting metadata...");
    ResultSetMetaData md = rs.getMetaData();
    if (m_Debug)
        System.err.println("Completed getting metadata...");

    // Determine structure of the instances
    int numAttributes = md.getColumnCount();
    int[] attributeTypes = new int[numAttributes];
    Hashtable[] nominalIndexes = new Hashtable[numAttributes];
    FastVector[] nominalStrings = new FastVector[numAttributes];
    for (int i = 1; i <= numAttributes; i++) {
        /* switch (md.getColumnType(i)) {
        case Types.CHAR:
        case Types.VARCHAR:
        case Types.LONGVARCHAR:
        case Types.BINARY:
        case Types.VARBINARY:
        case Types.LONGVARBINARY:*/

        switch (translateDBColumnType(md.getColumnTypeName(i))) {

        case STRING:
            //System.err.println("String --> nominal");
            attributeTypes[i - 1] = Attribute.NOMINAL;
            nominalIndexes[i - 1] = new Hashtable();
            nominalStrings[i - 1] = new FastVector();
            break;
        case TEXT:
            //System.err.println("Text --> string");
            attributeTypes[i - 1] = Attribute.STRING;
            nominalIndexes[i - 1] = new Hashtable();
            nominalStrings[i - 1] = new FastVector();
            break;
        case BOOL:
            //System.err.println("boolean --> nominal");
            attributeTypes[i - 1] = Attribute.NOMINAL;
            nominalIndexes[i - 1] = new Hashtable();
            nominalIndexes[i - 1].put("false", new Double(0));
            nominalIndexes[i - 1].put("true", new Double(1));
            nominalStrings[i - 1] = new FastVector();
            nominalStrings[i - 1].addElement("false");
            nominalStrings[i - 1].addElement("true");
            break;
        case DOUBLE:
            //System.err.println("BigDecimal --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case BYTE:
            //System.err.println("byte --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case SHORT:
            //System.err.println("short --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case INTEGER:
            //System.err.println("int --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case LONG:
            //System.err.println("long --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case FLOAT:
            //System.err.println("float --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case DATE:
            attributeTypes[i - 1] = Attribute.DATE;
            break;
        case TIME:
            attributeTypes[i - 1] = Attribute.DATE;
            break;
        default:
            //System.err.println("Unknown column type");
            attributeTypes[i - 1] = Attribute.STRING;
        }
    }

    // For sqlite
    // cache column names because the last while(rs.next()) { iteration for
    // the tuples below will close the md object:
    Vector<String> columnNames = new Vector<String>();
    for (int i = 0; i < numAttributes; i++) {
        columnNames.add(md.getColumnLabel(i + 1));
    }

    // Step through the tuples
    if (m_Debug)
        System.err.println("Creating instances...");
    FastVector instances = new FastVector();
    int rowCount = 0;
    while (rs.next()) {
        if (rowCount % 100 == 0) {
            if (m_Debug) {
                System.err.print("read " + rowCount + " instances \r");
                System.err.flush();
            }
        }
        double[] vals = new double[numAttributes];
        for (int i = 1; i <= numAttributes; i++) {
            /*switch (md.getColumnType(i)) {
            case Types.CHAR:
            case Types.VARCHAR:
            case Types.LONGVARCHAR:
            case Types.BINARY:
            case Types.VARBINARY:
            case Types.LONGVARBINARY:*/
            switch (translateDBColumnType(md.getColumnTypeName(i))) {
            case STRING:
                String str = rs.getString(i);

                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    Double index = (Double) nominalIndexes[i - 1].get(str);
                    if (index == null) {
                        index = new Double(nominalStrings[i - 1].size());
                        nominalIndexes[i - 1].put(str, index);
                        nominalStrings[i - 1].addElement(str);
                    }
                    vals[i - 1] = index.doubleValue();
                }
                break;
            case TEXT:
                String txt = rs.getString(i);

                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    Double index = (Double) nominalIndexes[i - 1].get(txt);
                    if (index == null) {

                        // Need to add one because first value in
                        // string attribute is dummy value.
                        index = new Double(nominalStrings[i - 1].size()) + 1;
                        nominalIndexes[i - 1].put(txt, index);
                        nominalStrings[i - 1].addElement(txt);
                    }
                    vals[i - 1] = index.doubleValue();
                }
                break;
            case BOOL:
                boolean boo = rs.getBoolean(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (boo ? 1.0 : 0.0);
                }
                break;
            case DOUBLE:
                //     BigDecimal bd = rs.getBigDecimal(i, 4);
                double dd = rs.getDouble(i);
                // Use the column precision instead of 4?
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    //       newInst.setValue(i - 1, bd.doubleValue());
                    vals[i - 1] = dd;
                }
                break;
            case BYTE:
                byte by = rs.getByte(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (double) by;
                }
                break;
            case SHORT:
                short sh = rs.getShort(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (double) sh;
                }
                break;
            case INTEGER:
                int in = rs.getInt(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (double) in;
                }
                break;
            case LONG:
                long lo = rs.getLong(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (double) lo;
                }
                break;
            case FLOAT:
                float fl = rs.getFloat(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (double) fl;
                }
                break;
            case DATE:
                Date date = rs.getDate(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    // TODO: Do a value check here.
                    vals[i - 1] = (double) date.getTime();
                }
                break;
            case TIME:
                Time time = rs.getTime(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    // TODO: Do a value check here.
                    vals[i - 1] = (double) time.getTime();
                }
                break;
            default:
                vals[i - 1] = Instance.missingValue();
            }
        }
        Instance newInst;
        if (m_CreateSparseData) {
            newInst = new SparseInstance(1.0, vals);
        } else {
            newInst = new Instance(1.0, vals);
        }
        instances.addElement(newInst);
        rowCount++;
    }
    //disconnectFromDatabase();  (perhaps other queries might be made)

    // Create the header and add the instances to the dataset
    if (m_Debug)
        System.err.println("Creating header...");
    FastVector attribInfo = new FastVector();
    for (int i = 0; i < numAttributes; i++) {
        /* Fix for databases that uppercase column names */
        // String attribName = attributeCaseFix(md.getColumnName(i + 1));
        String attribName = attributeCaseFix(columnNames.get(i));
        switch (attributeTypes[i]) {
        case Attribute.NOMINAL:
            attribInfo.addElement(new Attribute(attribName, nominalStrings[i]));
            break;
        case Attribute.NUMERIC:
            attribInfo.addElement(new Attribute(attribName));
            break;
        case Attribute.STRING:
            Attribute att = new Attribute(attribName, (FastVector) null);
            attribInfo.addElement(att);
            for (int n = 0; n < nominalStrings[i].size(); n++) {
                att.addStringValue((String) nominalStrings[i].elementAt(n));
            }
            break;
        case Attribute.DATE:
            attribInfo.addElement(new Attribute(attribName, (String) null));
            break;
        default:
            throw new Exception("Unknown attribute type");
        }
    }
    Instances result = new Instances("QueryResult", attribInfo, instances.size());
    for (int i = 0; i < instances.size(); i++) {
        result.add((Instance) instances.elementAt(i));
    }
    close(rs);

    return result;
}

From source file:sg.edu.nus.comp.nlp.ims.io.CWekaLexeltWriter.java

License:Open Source License

@Override
public Object getInstances(ILexelt p_Lexelt) throws ClassNotFoundException {
    String relation = p_Lexelt.getID();
    FastVector attributes = new FastVector();
    int capacity = p_Lexelt.size();

    IStatistic stat = p_Lexelt.getStatistic();
    Attribute ids = new Attribute("#ID");
    attributes.addElement(ids);//from   w ww .  j  av  a 2  s .  c  o  m
    int keySize = stat.getKeys().size();
    for (int keyIdx = 0; keyIdx < keySize; keyIdx++) {
        String key = stat.getKey(keyIdx);
        String type = stat.getType(keyIdx);
        if (ANumericFeature.class.isAssignableFrom(Class.forName(type))) {
            attributes.addElement(new Attribute(key));
        } else {
            FastVector attributeValues = new FastVector();
            List<String> values = stat.getValue(keyIdx);
            for (String value : values) {
                attributeValues.addElement(value);
            }
            if (attributeValues.size() == 0) {
                throw new IllegalStateException("No attribute specified.");
            }
            attributes.addElement(new Attribute(key, attributeValues));
        }
    }
    FastVector attributeValues = new FastVector();
    for (String tag : stat.getTags()) {
        attributeValues.addElement(tag);
    }
    attributes.addElement(new Attribute("#TAG", attributeValues));

    Instances instances = new Instances(relation, attributes, capacity);
    for (int instIdx = 0; instIdx < p_Lexelt.size(); instIdx++) {
        IInstance instance = p_Lexelt.getInstance(instIdx);
        int keyIdx = 0;
        double value;
        IFeature feature;

        int featureSize = instance.size();
        Hashtable<Integer, Double> features = new Hashtable<Integer, Double>();
        ArrayList<Integer> exist = new ArrayList<Integer>();
        for (int featIdx = 0; featIdx < featureSize; featIdx++) {
            feature = instance.getFeature(featIdx);
            keyIdx = stat.getIndex(feature.getKey());
            if (keyIdx < 0) {
                continue;
            }
            if (ANumericFeature.class.isInstance(feature)) {
                value = Double.parseDouble(feature.getValue());
            } else if (ABinaryFeature.class.isInstance(feature)) {
                value = instances.attribute(keyIdx + 1).indexOfValue(feature.getValue());
            } else {
                String fv = feature.getValue();
                if (fv == null || !stat.contains(keyIdx, fv)) {
                    fv = stat.getDefaultValue();
                }
                value = instances.attribute(keyIdx + 1).indexOfValue(fv);
            }
            features.put(keyIdx + 1, value);
            exist.add(keyIdx + 1);
        }
        exist.add(keySize + 1);
        Collections.sort(exist);

        double[] attValues = new double[keySize + 2];
        ids.addStringValue(instance.getID());
        attValues[0] = ids.indexOfValue(instance.getID());
        int begin, end = -1;
        for (int valueIdx = 0; valueIdx < exist.size(); valueIdx++) {
            begin = end + 1;
            end = exist.get(valueIdx);
            for (int i = begin; i < end; i++) {
                if (instances.attribute(i).isNumeric()) {
                    attValues[i] = 0;
                } else {
                    attValues[i] = instances.attribute(i).indexOfValue("0");
                }
            }
            if (end <= keySize) {
                attValues[end] = features.get(end);
            }
        }

        for (String tag : instance.getTag()) {
            if (tag.equals("'?'") || tag.equals("?")) {
                attValues[keySize + 1] = Instance.missingValue();
            } else {
                attValues[keySize + 1] = instances.attribute(keySize + 1).indexOfValue(tag);
            }
            Instance ins = new Instance(1, attValues);
            instances.add(ins);
        }
        if (instance.getTag().size() == 0) {
            attValues[keySize + 1] = Instance.missingValue();
            Instance ins = new Instance(1, attValues);
            instances.add(ins);
        }
    }
    return instances;
}

From source file:sg.edu.nus.comp.nlp.ims.io.CWekaSparseLexeltWriter.java

License:Open Source License

@Override
public Object getInstances(ILexelt p_Lexelt) throws ClassNotFoundException {
    String relation = p_Lexelt.getID();
    FastVector attributes = new FastVector();
    int capacity = p_Lexelt.size();

    IStatistic stat = p_Lexelt.getStatistic();
    Attribute ids = new Attribute("#ID");
    attributes.addElement(ids);/*from  www.  j a v  a  2 s . c o m*/
    int keySize = stat.getKeys().size();
    for (int keyIdx = 0; keyIdx < keySize; keyIdx++) {
        String key = stat.getKey(keyIdx);
        String type = stat.getType(keyIdx);
        if (ANumericFeature.class.isAssignableFrom(Class.forName(type))) {
            attributes.addElement(new Attribute(key));
        } else {
            FastVector attributeValues = new FastVector();
            List<String> values = stat.getValue(keyIdx);
            for (String value : values) {
                attributeValues.addElement(value);
            }
            if (attributeValues.size() == 0) {
                throw new IllegalStateException("No attribute specified.");
            }
            attributes.addElement(new Attribute(key, attributeValues));
        }
    }
    FastVector attributeValues = new FastVector();
    for (String tag : stat.getTags()) {
        attributeValues.addElement(tag);
    }
    attributes.addElement(new Attribute("#TAG", attributeValues));

    Instances instances = new Instances(relation, attributes, capacity);
    for (int instIdx = 0; instIdx < p_Lexelt.size(); instIdx++) {
        IInstance instance = p_Lexelt.getInstance(instIdx);
        int keyIdx = 0;
        double value;
        IFeature feature;

        int featureSize = instance.size();
        Hashtable<Integer, Double> features = new Hashtable<Integer, Double>();
        ArrayList<Integer> exist = new ArrayList<Integer>();
        for (int featIdx = 0; featIdx < featureSize; featIdx++) {
            feature = instance.getFeature(featIdx);
            keyIdx = stat.getIndex(feature.getKey());
            if (keyIdx < 0) {
                continue;
            }
            if (ANumericFeature.class.isInstance(feature)) {
                value = Double.parseDouble(feature.getValue());
            } else if (ABinaryFeature.class.isInstance(feature)) {
                value = instances.attribute(keyIdx + 1).indexOfValue(feature.getValue());
            } else {
                String fv = feature.getValue();
                if (fv == null || !stat.contains(keyIdx, fv)) {
                    fv = stat.getDefaultValue();
                }
                value = instances.attribute(keyIdx + 1).indexOfValue(fv);
            }
            features.put(keyIdx + 1, value);
            exist.add(keyIdx + 1);
        }
        Collections.sort(exist);

        double[] attrValues = new double[exist.size() + 2];
        int[] indices = new int[exist.size() + 2];
        ids.addStringValue(instance.getID());
        attrValues[0] = ids.indexOfValue(instance.getID());
        indices[0] = 0;
        for (int valueIdx = 0; valueIdx < exist.size(); valueIdx++) {
            indices[valueIdx + 1] = exist.get(valueIdx);
            attrValues[valueIdx + 1] = features.get(indices[valueIdx + 1]);
        }
        Attribute tags = instances.attribute(keySize + 1);
        indices[exist.size() + 1] = keySize + 1;
        for (String tag : instance.getTag()) {
            if (tag.equals("'?'") || tag.equals("?")) {
                attrValues[exist.size() + 1] = Instance.missingValue();
            } else {
                attrValues[exist.size() + 1] = tags.indexOfValue(tag);
            }
            SparseInstance ins = new SparseInstance(1, attrValues, indices, keySize + 2);
            instances.add(ins);
        }
        if (instance.getTag().size() == 0) {
            attrValues[exist.size() + 1] = Instance.missingValue();
            SparseInstance ins = new SparseInstance(1, attrValues, indices, keySize + 2);
            instances.add(ins);
        }
    }
    return instances;
}