org.textmin.tubes.dtm.decorator.InstancesBuilder.java Source code

Java tutorial

Introduction

Here is the source code for org.textmin.tubes.dtm.decorator.InstancesBuilder.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.textmin.tubes.dtm.decorator;

import org.textmin.tubes.dtm.DTMatrixDecorator;
import org.textmin.tubes.dtm.DocumentTermMatrix;
import java.util.ArrayList;
import java.util.Arrays;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instances;

/**
 *
 * @author spiralhalo
 */
public class InstancesBuilder extends DTMatrixDecorator<Double> {

    public InstancesBuilder(DocumentTermMatrix<Double> component) {
        super(component);
    }

    public Instances buildInstances(String rel, String[] classes) {
        ArrayList<String> classNominal = new ArrayList<>(classes.length);
        classNominal.addAll(Arrays.asList(classes));

        Attribute classesAttr = new Attribute("__TheClass", classNominal);
        ArrayList<Attribute> attributes = new ArrayList<>(termCount() + 1);
        attributes.add(classesAttr);
        termSet().forEach(e -> attributes.add(new Attribute(e)));

        Instances resultInstances = new Instances(rel, attributes, documentCount());
        resultInstances.setClassIndex(0);

        for (String d : documentSet()) {
            DenseInstance docInstance = new DenseInstance(attributes.size());
            docInstance.setValue(classesAttr, getClass(d));
            for (int i = 1; i < attributes.size(); i++) {
                Attribute attr = attributes.get(i);
                docInstance.setValue(i, getValue(d, attr.name()));
            }
            resultInstances.add(docInstance);
        }

        return resultInstances;
    }
}