utils.DataInfoUtils.java Source code

Java tutorial

Introduction

Here is the source code for utils.DataInfoUtils.java

Source

/*
 * This file is part of the MLDA.
 *
 * (c)  Jose Maria Moyano Murillo
 *      Eva Lucrecia Gibaja Galindo
 *      Sebastian Ventura Soto <sventura@uco.es>
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

package utils;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;
import mulan.data.LabelSet;
import mulan.data.MultiLabelInstances;
import mulan.data.Statistics;
import weka.core.Attribute;
import weka.core.Instances;

/**
 * This class implements some utils for data information
 * 
 * @author Jose Maria Moyano Murillo
 */
public class DataInfoUtils {

    /**
     * Obtain label names from a labelset
     * 
     * @param dataset Dataset
     * @param labelset Labelset
     * @return List with label names
     */
    public static ArrayList<String> getLabelNamesByLabelset(MultiLabelInstances dataset, String labelset) {
        ArrayList<String> labelNames = new ArrayList();

        for (int i = 0; i < labelset.length(); i++) {
            if (labelset.charAt(i) == '1') {
                labelNames.add(getLabelByIndex(dataset, i).name());
            }
        }

        return labelNames;
    }

    /**
     * Obtain label by index
     * 
     * @param dataset Dataset
     * @param id Label id
     * @return Label
     */
    public static Attribute getLabelByIndex(MultiLabelInstances dataset, int id) {
        int[] labelIndices = dataset.getLabelIndices();

        Attribute result = dataset.getDataSet().instance(1).attribute(labelIndices[id]);

        return result;
    }

    /**
     * Obtain number of appearances of a label
     * 
     * @param imbalancedData Labels as ImbalancedFeature object
     * @return Array with appearances of each label
     */
    public static double[] getLabelAppearances(ImbalancedFeature[] imbalancedData) {
        double[] labelFrequency = new double[imbalancedData.length];

        for (int i = 0; i < imbalancedData.length; i++) {
            labelFrequency[i] = (double) imbalancedData[i].getAppearances();
        }

        return labelFrequency;
    }

    /**
     * Get labelset with appearances
     * 
     * @param stat Statistics
     * @return Combinations of labelset-appearances
     */
    //KEY: num of labels, VALUE: appearances of the labelset
    public static HashMap<Integer, Integer> getLabelsetByValues(Statistics stat) {
        HashMap<LabelSet, Integer> result = stat.labelCombCount();
        Set<LabelSet> keysets = result.keySet();

        HashMap<Integer, Integer> labelsetsByFrequency = new HashMap<Integer, Integer>();

        int oldValue;

        for (LabelSet current : keysets) {
            int value = result.get(current);
            int key = current.size();

            if (labelsetsByFrequency.get(key) == null) {
                labelsetsByFrequency.put(key, value);
            } else {
                oldValue = labelsetsByFrequency.get(key);
                labelsetsByFrequency.remove(key);
                labelsetsByFrequency.put(key, value + oldValue);
            }
        }

        return labelsetsByFrequency;
    }

    /**
     * Get label frequency given the index
     * 
     * @param dataset Dataset
     * @param labelIndex Label index
     * @return Frequency of label
     */
    public static double getLabelFrequency(MultiLabelInstances dataset, int labelIndex) {
        double value = 0.0;

        Instances instances = dataset.getDataSet();

        double isLabel;

        for (int i = 0; i < instances.size(); i++) {
            isLabel = instances.instance(i).value(labelIndex);
            if (isLabel == 1.0) {
                value++;
            }
        }

        return value / dataset.getNumInstances();
    }

    /**
     * Check if the attribute exists
     * 
     * @param visited Attribute names
     * @param attribute Attribute
     * @return True if it exists and false otherwise
     */
    public static boolean existsAttribute(ArrayList<String> visited, ImbalancedFeature attribute) {
        for (String current : visited) {
            if (current.equals(attribute.getName())) {
                return true;
            }
        }

        return false;
    }

    /**
     * Obtain label by label name
     * 
     * @param labelName Label name
     * @param list List of labels
     * @return Label
     */
    public static ImbalancedFeature getLabelByLabelname(String labelName, ImbalancedFeature[] list) {
        for (int i = 0; i < list.length; i++) {
            if (labelName.equals(list[i].getName())) {
                return list[i];
            }
        }
        return null;
    }

    /**
     * Obtain label index
     * 
     * @param labels Labels
     * @param labelName Label name
     * @return Label index
     */
    public static int getLabelIndex(String[] labels, String labelName) {
        for (int i = 0; i < labels.length; i++) {
            if (labelName.equals(labels[i])) {
                return i;
            }
        }

        return -1;
    }

    /**
     * Obtain label names
     * 
     * @param labelIndices Label indices
     * @param instances Dataset
     * @return Label names
     */
    public static String[] getLabelNames(int[] labelIndices, Instances instances) {
        String[] labelName = new String[labelIndices.length];

        for (int i = 0; i < labelIndices.length; i++) {
            labelName[i] = instances.attribute(labelIndices[i]).name();
        }

        return labelName;
    }

    /**
     * Obtain label appearances by name
     * 
     * @param imbalancedData Labels
     * @param labelName Label name
     * @return Appearances
     */
    public static int getLabelAppearancesByName(ImbalancedFeature[] imbalancedData, String labelName) {
        for (int i = 0; i < imbalancedData.length; i++) {
            if (imbalancedData[i].getName().equals(labelName)) {
                return imbalancedData[i].getAppearances();
            }
        }

        return -1;
    }

    /**
     * Obtain current label values
     * 
     * @param instances Dataset
     * @param position Position
     * @param labelIndices Label indices
     * @return Array of values
     */
    public static int[] getCurrentValueLabels(Instances instances, int position, int[] labelIndices) {
        int[] labelsValue = new int[labelIndices.length];
        int value;

        for (int i = 0; i < labelsValue.length; i++) {
            value = (int) instances.instance(position).value(labelIndices[i]);
            labelsValue[i] = value;
        }

        return labelsValue;
    }
}