Example usage for weka.core Attribute toString

List of usage examples for weka.core Attribute toString

Introduction

In this page you can find the example usage for weka.core Attribute toString.

Prototype

@Override
public final String toString() 

Source Link

Document

Returns a description of this attribute in ARFF format.

Usage

From source file:mao.datamining.DataSetPair.java

private void doItOnce4All() {
    if (didIt)/* w w  w . j a v  a  2  s.c o m*/
        return;
    didIt = true;
    try {
        //step 0, remove all those empty columns, which has more than 50% missing values
        Instances orangeDataSet = ConverterUtils.DataSource.read(trainSourceFileName);
        orangeDataSet.setClassIndex(orangeDataSet.numAttributes() - 1);
        Attribute classAttr = orangeDataSet.attribute(orangeDataSet.numAttributes() - 1);
        MainLogger.log(Level.INFO, "Class Attribute: {0}", classAttr.toString());

        //step 0-1, to remove all columns which has more than half missing values
        Instances newData = orangeDataSet;
        RemoveUselessColumnsByMissingValues removeMissingValuesColumns = new RemoveUselessColumnsByMissingValues();
        removeMissingValuesColumns.setM_maxMissingPercentage(50);
        removeMissingValuesColumns.setManualDeleteColumns(columns2Delete);
        removeMissingValuesColumns.setInputFormat(newData);
        newData = Filter.useFilter(newData, removeMissingValuesColumns);
        Main.logging("== New Data After Removing all Columns having >50% missing values: ===\n"
                + newData.toSummaryString());
        try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")))) {
            writer.write(newData.toString());
        }

        //step 0-2 to transform those numeric columns to Nominal
        //to delete those instances with more than half missing values
        BufferedReader reader70 = new BufferedReader(new InputStreamReader(
                new FileInputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")));
        BufferedWriter writerAfterDeleteRows = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveRows2.arff")));
        int columnNum = newData.numAttributes();
        int totalInstanceNum = newData.numInstances(), deleteM1Num = 0, delete1Num = 0;
        String line = null;
        int missingColumnNum = 0;
        while ((line = reader70.readLine()) != null) {
            missingColumnNum = 0;
            for (int i = 0; i < line.length(); i++) {
                if (line.charAt(i) == '?')
                    missingColumnNum++;
            }
            if (missingColumnNum * 100 / columnNum < 50) {
                writerAfterDeleteRows.write(line);
                writerAfterDeleteRows.newLine();
            } else {
                System.out.println("Delete Row: [" + line + "]");
                if (line.endsWith("-1")) {
                    deleteM1Num++;
                } else {
                    delete1Num++;
                }
            }
        }
        System.out.println("Total: " + totalInstanceNum + ", delete class -1: " + deleteM1Num
                + ", delete class 1:  " + delete1Num);
        reader70.close();
        writerAfterDeleteRows.close();

        //create sample files:
        createSampleDataSets();

    } catch (Exception e) {
        Main.logging(null, e);
    }
}

From source file:utils.DataIOUtils.java

License:Open Source License

/**
 * Save dataset//from w  w  w.jav a 2  s .com
 * 
 * @param wr PrintWriter
 * @param dataset Dataset
 * @param relationName Name of the relation
 */
public static void saveDataset(PrintWriter wr, MultiLabelInstances dataset, String relationName) {
    //relationName = relationName.replaceAll(" ", "_");
    if (relationName.contains("-")) {
        wr.write("@relation " + "\'" + relationName + "\'");
    } else if (relationName.contains(":")) {
        wr.write("@relation " + "\'" + relationName + "\'");
    } else {
        wr.write("@relation " + relationName);
    }

    wr.write(System.getProperty("line.separator"));

    Instances instances = dataset.getDataSet();

    Attribute att;
    for (int i = 0; i < instances.numAttributes(); i++) {
        att = instances.attribute(i);
        wr.write(att.toString());
        wr.write(System.getProperty("line.separator"));
    }

    String current;

    wr.write("@data");
    wr.write(System.getProperty("line.separator"));
    for (int i = 0; i < dataset.getNumInstances(); i++) {
        current = dataset.getDataSet().get(i).toString();
        wr.write(current);
        wr.write(System.getProperty("line.separator"));
    }
}

From source file:utils.DataIOUtils.java

License:Open Source License

/**
 * Save multi-label multi-view dataset//from   ww  w  . ja v a 2 s  .com
 * 
 * @param wr PrintWriter
 * @param dataset Dataset
 * @param relationName Name of the relation
 * @param views String with views intervals
 */
public static void saveDatasetMV(PrintWriter wr, MultiLabelInstances dataset, String relationName,
        String views) {
    //relationName = relationName.replaceAll(" ", "_");

    wr.write("@relation " + "\'" + relationName + " " + views + "\'");
    wr.write(System.getProperty("line.separator"));

    Instances instancias = dataset.getDataSet();

    Attribute att;
    for (int i = 0; i < instancias.numAttributes(); i++) {
        att = instancias.attribute(i);
        wr.write(att.toString());
        wr.write(System.getProperty("line.separator"));
    }

    String current;

    wr.write("@data");
    wr.write(System.getProperty("line.separator"));
    for (int i = 0; i < dataset.getNumInstances(); i++) {
        current = dataset.getDataSet().get(i).toString();
        wr.write(current);
        wr.write(System.getProperty("line.separator"));
    }
}

From source file:utils.DataIOUtils.java

License:Open Source License

/**
 * Save meka dataset//from  w w  w  . java 2 s.c  o m
 * 
 * @param wr PrintWriter
 * @param dataset Dataset
 * @param relationName Name of the relation
 */
public static void saveMekaDataset(PrintWriter wr, MultiLabelInstances dataset, String relationName) {
    int maxAttIndex;
    int minAttIndex;

    String c;
    c = "-C ";

    int[] attIndex = dataset.getFeatureIndices();

    maxAttIndex = getMax(attIndex);
    minAttIndex = getMin(attIndex);

    int[] labelIndices = dataset.getLabelIndices();

    boolean areLabelMaxIndices = true;
    boolean areLabelMinIndices = false;

    for (int i = 0; i < labelIndices.length && areLabelMaxIndices; i++) {
        if (labelIndices[i] < maxAttIndex) {
            areLabelMaxIndices = false;
        }
    }

    if (!areLabelMaxIndices) {
        areLabelMinIndices = true;
        for (int i = 0; i < labelIndices.length && areLabelMinIndices; i++) {
            if (labelIndices[i] > minAttIndex) {
                areLabelMinIndices = false;
            }
        }
    }

    if ((!areLabelMaxIndices) && (!areLabelMinIndices)) {
        JOptionPane.showMessageDialog(null, "Cannot save as meka.", "alert", JOptionPane.ERROR_MESSAGE);
        return;
    } else if (areLabelMaxIndices) {
        c = c + "-" + labelIndices.length;
    } else {
        c = c + labelIndices.length;
    }

    if (relationName.contains("-C")) {
        wr.write("@relation " + "\'" + relationName + "\'");
    } else {
        if (relationName.contains("-V:")) {
            wr.write("@relation " + "\'" + relationName.split("-V:")[0] + ": " + c + " -V:"
                    + relationName.split("-V:")[1] + "\'");
        } else {
            wr.write("@relation " + "\'" + relationName + ": " + c + "\'");
        }
    }

    wr.write(System.getProperty("line.separator"));

    Instances instances = dataset.getDataSet();

    Attribute att;
    for (int i = 0; i < instances.numAttributes(); i++) {
        att = instances.attribute(i);
        wr.write(att.toString());
        wr.write(System.getProperty("line.separator"));
    }

    String current;

    wr.write("@data");
    wr.write(System.getProperty("line.separator"));
    for (int i = 0; i < dataset.getNumInstances(); i++) {
        current = dataset.getDataSet().get(i).toString();
        wr.write(current);
        wr.write(System.getProperty("line.separator"));
    }
}

From source file:utils.DataIOUtils.java

License:Open Source License

/**
 * Save multi-view multi-label meka dataset
 * /* www .j  av a  2s  .  co  m*/
 * @param wr PrintWriter
 * @param dataset Dataset
 * @param relationName Name of the relation
 * @param views String with views intervals
 */
public static void saveMVMekaDataset(PrintWriter wr, MultiLabelInstances dataset, String relationName,
        String views) {
    int maxAttIndex;
    int minAttIndex;

    String c;
    c = "-C ";

    int[] attIndex = dataset.getFeatureIndices();

    maxAttIndex = getMax(attIndex);
    minAttIndex = getMin(attIndex);

    int[] labelIndices = dataset.getLabelIndices();

    boolean areLabelMaxIndices = true;
    boolean areLabelMinIndices = false;

    for (int i = 0; i < labelIndices.length && areLabelMaxIndices; i++) {
        if (labelIndices[i] < maxAttIndex) {
            areLabelMaxIndices = false;
        }
    }

    if (!areLabelMaxIndices) {
        areLabelMinIndices = true;
        for (int i = 0; i < labelIndices.length && areLabelMinIndices; i++) {
            if (labelIndices[i] > minAttIndex) {
                areLabelMinIndices = false;
            }
        }
    }

    if ((!areLabelMaxIndices) && (!areLabelMinIndices)) {
        JOptionPane.showMessageDialog(null, "Cannot save as meka.", "alert", JOptionPane.ERROR_MESSAGE);
        return;
    } else if (areLabelMaxIndices) {
        c = c + "-" + labelIndices.length;
    } else {
        c = c + labelIndices.length;
    }

    wr.write("@relation " + "\'" + relationName + ": " + c + " " + views + "\'");
    wr.write(System.getProperty("line.separator"));

    Instances instances = dataset.getDataSet();

    Attribute att;
    for (int i = 0; i < instances.numAttributes(); i++) {
        att = instances.attribute(i);
        wr.write(att.toString());
        wr.write(System.getProperty("line.separator"));
    }

    String current;

    wr.write("@data");
    wr.write(System.getProperty("line.separator"));
    for (int i = 0; i < dataset.getNumInstances(); i++) {
        current = dataset.getDataSet().get(i).toString();
        wr.write(current);
        wr.write(System.getProperty("line.separator"));
    }
}