de.upb.timok.utils.DatasetTransformationUtils.java Source code

Java tutorial

Introduction

Here is the source code for de.upb.timok.utils.DatasetTransformationUtils.java

Source

/*******************************************************************************
 * This file is part of PDTTA, a library for learning Probabilistic deterministic timed-transition Automata.
 * Copyright (C) 2013-2015  Timo Klerx
 * 
 * PDTTA is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
 * 
 * PDTTA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along with PDTTA.  If not, see <http://www.gnu.org/licenses/>.
 ******************************************************************************/
package de.upb.timok.utils;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import jsat.DataSet;
import jsat.SimpleDataSet;
import jsat.classifiers.DataPoint;
import jsat.linear.DenseVector;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;

public class DatasetTransformationUtils {
    private static Logger logger = LoggerFactory.getLogger(DatasetTransformationUtils.class);

    public static Instances trainingSetToInstances(List<double[]> trainingSet) {
        final double[] sample = trainingSet.get(0);
        final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1);
        for (int i = 0; i < sample.length; i++) {
            fvWekaAttributes.add(new Attribute(Integer.toString(i)));
        }

        final ArrayList<String> classStrings = new ArrayList<>();
        classStrings.add("normal");
        final Attribute ClassAttribute = new Attribute("class", classStrings);

        // Declare the feature vector
        fvWekaAttributes.add(ClassAttribute);
        final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size());
        result.setClass(ClassAttribute);
        result.setClassIndex(fvWekaAttributes.size() - 1);
        for (final double[] instance : trainingSet) {
            final double[] newInstance = Arrays.copyOf(instance, instance.length + 1);
            newInstance[newInstance.length - 1] = 0;
            final Instance wekaInstance = new DenseInstance(1, newInstance);
            wekaInstance.setDataset(result);
            result.add(wekaInstance);
        }
        return result;
    }

    public static Instances testSetToInstances(List<double[]> testSet) {
        if (testSet.size() == 0) {
            logger.warn("TestSet has size 0");
        }
        final double[] sample = testSet.get(0);
        final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length);
        for (int i = 0; i < sample.length; i++) {
            fvWekaAttributes.add(new Attribute(Integer.toString(i)));
        }
        final ArrayList<String> classStrings = new ArrayList<>();
        classStrings.add("normal");
        final Attribute ClassAttribute = new Attribute("class", classStrings);
        fvWekaAttributes.add(ClassAttribute);

        // Declare the feature vector
        final Instances result = new Instances("testSet", fvWekaAttributes, testSet.size());
        result.setClassIndex(fvWekaAttributes.size() - 1);
        for (final double[] instance : testSet) {
            final Instance wekaInstance = new DenseInstance(1, instance);
            wekaInstance.setDataset(result);
            result.add(wekaInstance);
        }
        return result;
    }

    public static List<double[]> instancesToDoubles(Instances instances, boolean chopClassAttribute) {
        final List<double[]> result = new ArrayList<>();
        for (int i = 0; i < instances.size(); i++) {
            final Instance instance = instances.get(i);
            double[] temp = instance.toDoubleArray();
            if (chopClassAttribute) {
                temp = Arrays.copyOfRange(temp, 0, temp.length - 1);
            }
            result.add(temp);
        }
        return result;
    }

    public static DataSet doublesToDataSet(List<double[]> doubleVectors) {
        final List<DataPoint> dataPoints = new ArrayList<>(doubleVectors.size());
        for (final double[] sample : doubleVectors) {
            final DataPoint dp = new DataPoint(new DenseVector(sample));
            dataPoints.add(dp);
        }
        final DataSet result = new SimpleDataSet(dataPoints);
        return result;
    }

}