Example usage for weka.filters.unsupervised.attribute AddValues AddValues

List of usage examples for weka.filters.unsupervised.attribute AddValues AddValues

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute AddValues AddValues.

Prototype

AddValues

Source Link

Usage

From source file:gov.va.chir.tagline.dao.DatasetUtil.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Instances createDataset(final Instances header, final Collection<Document> documents)
        throws Exception {

    // Update header to include all docIDs from the passed in documents
    // (Weka requires all values for nominal features)
    final Set<String> docIds = new TreeSet<String>();

    for (Document document : documents) {
        docIds.add(document.getName());//from ww w  .  j a  v  a2 s  . c  om
    }

    final AddValues avf = new AddValues();
    avf.setLabels(StringUtils.join(docIds, ","));

    // Have to add 1 because SingleIndex.setValue() has a bug, expecting
    // the passed in index to be 1-based rather than 0-based. Why? I have 
    // no idea.
    // Calling path: AddValues.setInputFormat() -->
    //               SingleIndex.setUpper() -->
    //               SingleIndex.setValue()
    avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1));

    avf.setInputFormat(header);
    final Instances newHeader = Filter.useFilter(header, avf);

    final Instances instances = new Instances(newHeader, documents.size());

    // Map attributes
    final Map<String, Attribute> attrMap = new HashMap<String, Attribute>();

    final Enumeration<Attribute> en = newHeader.enumerateAttributes();

    while (en.hasMoreElements()) {
        final Attribute attr = en.nextElement();

        attrMap.put(attr.name(), attr);
    }

    attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute());

    final Attribute docId = attrMap.get(DOC_ID);
    final Attribute lineId = attrMap.get(LINE_ID);
    final Attribute classAttr = attrMap.get(LABEL);

    // Add data
    for (Document document : documents) {
        final Map<String, Object> docFeatures = document.getFeatures();

        for (Line line : document.getLines()) {
            final Instance instance = new DenseInstance(attrMap.size());

            final Map<String, Object> lineFeatures = line.getFeatures();
            lineFeatures.putAll(docFeatures);

            instance.setValue(docId, document.getName());
            instance.setValue(lineId, line.getLineId());

            if (line.getLabel() == null) {
                instance.setMissing(classAttr);
            } else {
                instance.setValue(classAttr, line.getLabel());
            }

            for (Attribute attribute : attrMap.values()) {
                if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) {
                    final String name = attribute.name();
                    final Object obj = lineFeatures.get(name);

                    if (obj instanceof Double) {
                        instance.setValue(attribute, ((Double) obj).doubleValue());
                    } else if (obj instanceof Integer) {
                        instance.setValue(attribute, ((Integer) obj).doubleValue());
                    } else {
                        instance.setValue(attribute, obj.toString());
                    }
                }
            }

            instances.add(instance);
        }
    }

    // Set last attribute as class
    instances.setClassIndex(attrMap.size() - 1);

    return instances;
}

From source file:wekimini.DataManager.java

private void updateInstancesForNewHigherMaxClass(int index, int newNumClasses) {
    //Change allInstances, dummyInstances
    // dummyInstances.attribute(numMetaData + numInputs + index).
    AddValues a = new AddValues();
    int oldMaxClasses = numClasses[index];
    StringBuilder sb = new StringBuilder();
    for (int i = oldMaxClasses + 1; i < newNumClasses; i++) {
        sb.append(Integer.toString(i)).append(",");
    }//from   w w w . j a va2  s. c om
    sb.append(Integer.toString(newNumClasses));

    Instances newAll;
    try {
        a.setAttributeIndex(Integer.toString(numMetaData + numInputs + index + 1)); //Weka indexing stupidity
        a.setLabels(sb.toString());
        a.setSort(false);
        a.setInputFormat(allInstances);
        newAll = Filter.useFilter(allInstances, a);
    } catch (Exception ex) {
        Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex);
        return;
    }
    if (newAll.numInstances() != allInstances.numInstances()) {
        logger.log(Level.SEVERE, "Problem: deleted instances when removing class attribute");
    }
    allInstances = newAll;

    Instances newD;
    try {
        newD = Filter.useFilter(dummyInstances, a);
    } catch (Exception ex) {
        Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex);
        return;
    }

    dummyInstances = newD;
}