List of usage examples for weka.core Attribute toString
@Override public final String toString()
From source file:mao.datamining.DataSetPair.java
private void doItOnce4All() { if (didIt)/* w w w . j a v a 2 s.c o m*/ return; didIt = true; try { //step 0, remove all those empty columns, which has more than 50% missing values Instances orangeDataSet = ConverterUtils.DataSource.read(trainSourceFileName); orangeDataSet.setClassIndex(orangeDataSet.numAttributes() - 1); Attribute classAttr = orangeDataSet.attribute(orangeDataSet.numAttributes() - 1); MainLogger.log(Level.INFO, "Class Attribute: {0}", classAttr.toString()); //step 0-1, to remove all columns which has more than half missing values Instances newData = orangeDataSet; RemoveUselessColumnsByMissingValues removeMissingValuesColumns = new RemoveUselessColumnsByMissingValues(); removeMissingValuesColumns.setM_maxMissingPercentage(50); removeMissingValuesColumns.setManualDeleteColumns(columns2Delete); removeMissingValuesColumns.setInputFormat(newData); newData = Filter.useFilter(newData, removeMissingValuesColumns); Main.logging("== New Data After Removing all Columns having >50% missing values: ===\n" + newData.toSummaryString()); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")))) { writer.write(newData.toString()); } //step 0-2 to transform those numeric columns to Nominal //to delete those instances with more than half missing values BufferedReader reader70 = new BufferedReader(new InputStreamReader( new FileInputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff"))); BufferedWriter writerAfterDeleteRows = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveRows2.arff"))); int columnNum = newData.numAttributes(); int totalInstanceNum = newData.numInstances(), deleteM1Num = 0, delete1Num = 0; String line = null; int missingColumnNum = 0; while ((line = reader70.readLine()) != null) { missingColumnNum = 0; for (int i = 0; i < line.length(); i++) { if (line.charAt(i) == '?') missingColumnNum++; } if (missingColumnNum * 100 / columnNum < 50) { writerAfterDeleteRows.write(line); writerAfterDeleteRows.newLine(); } else { System.out.println("Delete Row: [" + line + "]"); if (line.endsWith("-1")) { deleteM1Num++; } else { delete1Num++; } } } System.out.println("Total: " + totalInstanceNum + ", delete class -1: " + deleteM1Num + ", delete class 1: " + delete1Num); reader70.close(); writerAfterDeleteRows.close(); //create sample files: createSampleDataSets(); } catch (Exception e) { Main.logging(null, e); } }
From source file:utils.DataIOUtils.java
License:Open Source License
/** * Save dataset//from w w w.jav a 2 s .com * * @param wr PrintWriter * @param dataset Dataset * @param relationName Name of the relation */ public static void saveDataset(PrintWriter wr, MultiLabelInstances dataset, String relationName) { //relationName = relationName.replaceAll(" ", "_"); if (relationName.contains("-")) { wr.write("@relation " + "\'" + relationName + "\'"); } else if (relationName.contains(":")) { wr.write("@relation " + "\'" + relationName + "\'"); } else { wr.write("@relation " + relationName); } wr.write(System.getProperty("line.separator")); Instances instances = dataset.getDataSet(); Attribute att; for (int i = 0; i < instances.numAttributes(); i++) { att = instances.attribute(i); wr.write(att.toString()); wr.write(System.getProperty("line.separator")); } String current; wr.write("@data"); wr.write(System.getProperty("line.separator")); for (int i = 0; i < dataset.getNumInstances(); i++) { current = dataset.getDataSet().get(i).toString(); wr.write(current); wr.write(System.getProperty("line.separator")); } }
From source file:utils.DataIOUtils.java
License:Open Source License
/** * Save multi-label multi-view dataset//from ww w . ja v a 2 s .com * * @param wr PrintWriter * @param dataset Dataset * @param relationName Name of the relation * @param views String with views intervals */ public static void saveDatasetMV(PrintWriter wr, MultiLabelInstances dataset, String relationName, String views) { //relationName = relationName.replaceAll(" ", "_"); wr.write("@relation " + "\'" + relationName + " " + views + "\'"); wr.write(System.getProperty("line.separator")); Instances instancias = dataset.getDataSet(); Attribute att; for (int i = 0; i < instancias.numAttributes(); i++) { att = instancias.attribute(i); wr.write(att.toString()); wr.write(System.getProperty("line.separator")); } String current; wr.write("@data"); wr.write(System.getProperty("line.separator")); for (int i = 0; i < dataset.getNumInstances(); i++) { current = dataset.getDataSet().get(i).toString(); wr.write(current); wr.write(System.getProperty("line.separator")); } }
From source file:utils.DataIOUtils.java
License:Open Source License
/** * Save meka dataset//from w w w . java 2 s.c o m * * @param wr PrintWriter * @param dataset Dataset * @param relationName Name of the relation */ public static void saveMekaDataset(PrintWriter wr, MultiLabelInstances dataset, String relationName) { int maxAttIndex; int minAttIndex; String c; c = "-C "; int[] attIndex = dataset.getFeatureIndices(); maxAttIndex = getMax(attIndex); minAttIndex = getMin(attIndex); int[] labelIndices = dataset.getLabelIndices(); boolean areLabelMaxIndices = true; boolean areLabelMinIndices = false; for (int i = 0; i < labelIndices.length && areLabelMaxIndices; i++) { if (labelIndices[i] < maxAttIndex) { areLabelMaxIndices = false; } } if (!areLabelMaxIndices) { areLabelMinIndices = true; for (int i = 0; i < labelIndices.length && areLabelMinIndices; i++) { if (labelIndices[i] > minAttIndex) { areLabelMinIndices = false; } } } if ((!areLabelMaxIndices) && (!areLabelMinIndices)) { JOptionPane.showMessageDialog(null, "Cannot save as meka.", "alert", JOptionPane.ERROR_MESSAGE); return; } else if (areLabelMaxIndices) { c = c + "-" + labelIndices.length; } else { c = c + labelIndices.length; } if (relationName.contains("-C")) { wr.write("@relation " + "\'" + relationName + "\'"); } else { if (relationName.contains("-V:")) { wr.write("@relation " + "\'" + relationName.split("-V:")[0] + ": " + c + " -V:" + relationName.split("-V:")[1] + "\'"); } else { wr.write("@relation " + "\'" + relationName + ": " + c + "\'"); } } wr.write(System.getProperty("line.separator")); Instances instances = dataset.getDataSet(); Attribute att; for (int i = 0; i < instances.numAttributes(); i++) { att = instances.attribute(i); wr.write(att.toString()); wr.write(System.getProperty("line.separator")); } String current; wr.write("@data"); wr.write(System.getProperty("line.separator")); for (int i = 0; i < dataset.getNumInstances(); i++) { current = dataset.getDataSet().get(i).toString(); wr.write(current); wr.write(System.getProperty("line.separator")); } }
From source file:utils.DataIOUtils.java
License:Open Source License
/** * Save multi-view multi-label meka dataset * /* www .j av a 2s . co m*/ * @param wr PrintWriter * @param dataset Dataset * @param relationName Name of the relation * @param views String with views intervals */ public static void saveMVMekaDataset(PrintWriter wr, MultiLabelInstances dataset, String relationName, String views) { int maxAttIndex; int minAttIndex; String c; c = "-C "; int[] attIndex = dataset.getFeatureIndices(); maxAttIndex = getMax(attIndex); minAttIndex = getMin(attIndex); int[] labelIndices = dataset.getLabelIndices(); boolean areLabelMaxIndices = true; boolean areLabelMinIndices = false; for (int i = 0; i < labelIndices.length && areLabelMaxIndices; i++) { if (labelIndices[i] < maxAttIndex) { areLabelMaxIndices = false; } } if (!areLabelMaxIndices) { areLabelMinIndices = true; for (int i = 0; i < labelIndices.length && areLabelMinIndices; i++) { if (labelIndices[i] > minAttIndex) { areLabelMinIndices = false; } } } if ((!areLabelMaxIndices) && (!areLabelMinIndices)) { JOptionPane.showMessageDialog(null, "Cannot save as meka.", "alert", JOptionPane.ERROR_MESSAGE); return; } else if (areLabelMaxIndices) { c = c + "-" + labelIndices.length; } else { c = c + labelIndices.length; } wr.write("@relation " + "\'" + relationName + ": " + c + " " + views + "\'"); wr.write(System.getProperty("line.separator")); Instances instances = dataset.getDataSet(); Attribute att; for (int i = 0; i < instances.numAttributes(); i++) { att = instances.attribute(i); wr.write(att.toString()); wr.write(System.getProperty("line.separator")); } String current; wr.write("@data"); wr.write(System.getProperty("line.separator")); for (int i = 0; i < dataset.getNumInstances(); i++) { current = dataset.getDataSet().get(i).toString(); wr.write(current); wr.write(System.getProperty("line.separator")); } }