Example usage for weka.filters.unsupervised.attribute AddValues setInputFormat

List of usage examples for weka.filters.unsupervised.attribute AddValues setInputFormat

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute AddValues setInputFormat.

Prototype

@Override
public boolean setInputFormat(Instances instanceInfo) throws Exception 

Source Link

Document

Sets the format of the input instances.

Usage

From source file:gov.va.chir.tagline.dao.DatasetUtil.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Instances createDataset(final Instances header, final Collection<Document> documents)
        throws Exception {

    // Update header to include all docIDs from the passed in documents
    // (Weka requires all values for nominal features)
    final Set<String> docIds = new TreeSet<String>();

    for (Document document : documents) {
        docIds.add(document.getName());//from  w w  w  . ja v  a 2 s.c  om
    }

    final AddValues avf = new AddValues();
    avf.setLabels(StringUtils.join(docIds, ","));

    // Have to add 1 because SingleIndex.setValue() has a bug, expecting
    // the passed in index to be 1-based rather than 0-based. Why? I have 
    // no idea.
    // Calling path: AddValues.setInputFormat() -->
    //               SingleIndex.setUpper() -->
    //               SingleIndex.setValue()
    avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1));

    avf.setInputFormat(header);
    final Instances newHeader = Filter.useFilter(header, avf);

    final Instances instances = new Instances(newHeader, documents.size());

    // Map attributes
    final Map<String, Attribute> attrMap = new HashMap<String, Attribute>();

    final Enumeration<Attribute> en = newHeader.enumerateAttributes();

    while (en.hasMoreElements()) {
        final Attribute attr = en.nextElement();

        attrMap.put(attr.name(), attr);
    }

    attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute());

    final Attribute docId = attrMap.get(DOC_ID);
    final Attribute lineId = attrMap.get(LINE_ID);
    final Attribute classAttr = attrMap.get(LABEL);

    // Add data
    for (Document document : documents) {
        final Map<String, Object> docFeatures = document.getFeatures();

        for (Line line : document.getLines()) {
            final Instance instance = new DenseInstance(attrMap.size());

            final Map<String, Object> lineFeatures = line.getFeatures();
            lineFeatures.putAll(docFeatures);

            instance.setValue(docId, document.getName());
            instance.setValue(lineId, line.getLineId());

            if (line.getLabel() == null) {
                instance.setMissing(classAttr);
            } else {
                instance.setValue(classAttr, line.getLabel());
            }

            for (Attribute attribute : attrMap.values()) {
                if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) {
                    final String name = attribute.name();
                    final Object obj = lineFeatures.get(name);

                    if (obj instanceof Double) {
                        instance.setValue(attribute, ((Double) obj).doubleValue());
                    } else if (obj instanceof Integer) {
                        instance.setValue(attribute, ((Integer) obj).doubleValue());
                    } else {
                        instance.setValue(attribute, obj.toString());
                    }
                }
            }

            instances.add(instance);
        }
    }

    // Set last attribute as class
    instances.setClassIndex(attrMap.size() - 1);

    return instances;
}

From source file:wekimini.DataManager.java

private void updateInstancesForNewHigherMaxClass(int index, int newNumClasses) {
    //Change allInstances, dummyInstances
    // dummyInstances.attribute(numMetaData + numInputs + index).
    AddValues a = new AddValues();
    int oldMaxClasses = numClasses[index];
    StringBuilder sb = new StringBuilder();
    for (int i = oldMaxClasses + 1; i < newNumClasses; i++) {
        sb.append(Integer.toString(i)).append(",");
    }/*  ww w.  ja  v  a  2s  .co  m*/
    sb.append(Integer.toString(newNumClasses));

    Instances newAll;
    try {
        a.setAttributeIndex(Integer.toString(numMetaData + numInputs + index + 1)); //Weka indexing stupidity
        a.setLabels(sb.toString());
        a.setSort(false);
        a.setInputFormat(allInstances);
        newAll = Filter.useFilter(allInstances, a);
    } catch (Exception ex) {
        Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex);
        return;
    }
    if (newAll.numInstances() != allInstances.numInstances()) {
        logger.log(Level.SEVERE, "Problem: deleted instances when removing class attribute");
    }
    allInstances = newAll;

    Instances newD;
    try {
        newD = Filter.useFilter(dummyInstances, a);
    } catch (Exception ex) {
        Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex);
        return;
    }

    dummyInstances = newD;
}