List of usage examples for weka.core SparseInstance toString
@Override
public String toString()
From source file:mulan.data.ConverterLibSVM.java
License:Open Source License
/** * Converts a multi-label dataset from LibSVM format to the format * that is compatible with Mulan. It constructs one ARFF and one XML file. * * @param path the directory that contains the source file and will contain * the target files/*from ww w .ja v a 2 s. c o m*/ * @param sourceFilename the name of the source file * @param relationName the relation name of the arff file that will be * constructed * @param targetFilestem the filestem for the target files (.arff and .xml) */ public static void convertFromLibSVM(String path, String sourceFilename, String targetFilestem, String relationName) { BufferedReader aReader = null; BufferedWriter aWriter = null; int numLabels = 0; int numAttributes = 0; int numInstances = 0; double meanParsedAttributes = 0; // Calculate number of labels and attributes String Line = null; try { aReader = new BufferedReader(new FileReader(path + sourceFilename)); while ((Line = aReader.readLine()) != null) { numInstances++; StringTokenizer strTok = new StringTokenizer(Line, " "); while (strTok.hasMoreTokens()) { String token = strTok.nextToken(); if (token.indexOf(":") == -1) { // parse label info StringTokenizer labelTok = new StringTokenizer(token, ","); while (labelTok.hasMoreTokens()) { String strLabel = labelTok.nextToken(); int intLabel = Integer.parseInt(strLabel); if (intLabel > numLabels) { numLabels = intLabel; } } } else { // parse attribute info meanParsedAttributes++; StringTokenizer attrTok = new StringTokenizer(token, ":"); String strAttrIndex = attrTok.nextToken(); int intAttrIndex = Integer.parseInt(strAttrIndex); if (intAttrIndex > numAttributes) { numAttributes = intAttrIndex; } } } } numLabels++; System.out.println("Number of attributes: " + numAttributes); System.out.println("Number of instances: " + numInstances); System.out.println("Number of classes: " + numLabels); System.out.println("Constructing XML file... "); LabelsMetaDataImpl meta = new LabelsMetaDataImpl(); for (int label = 0; label < numLabels; label++) { meta.addRootNode(new LabelNodeImpl("Label" + (label + 1))); } String labelsFilePath = path + targetFilestem + ".xml"; try { LabelsBuilder.dumpLabels(meta, labelsFilePath); System.out.println("Done!"); } catch (LabelsBuilderException e) { File labelsFile = new File(labelsFilePath); if (labelsFile.exists()) { labelsFile.delete(); } System.out.println("Construction of labels XML failed!"); } meanParsedAttributes /= numInstances; boolean Sparse = false; if (meanParsedAttributes < numAttributes) { Sparse = true; System.out.println("Dataset is sparse."); } // Define Instances class to hold data ArrayList<Attribute> attInfo = new ArrayList<Attribute>(numAttributes + numLabels); Attribute[] att = new Attribute[numAttributes + numLabels]; for (int i = 0; i < numAttributes; i++) { att[i] = new Attribute("Att" + (i + 1)); attInfo.add(att[i]); } ArrayList<String> ClassValues = new ArrayList<String>(2); ClassValues.add("0"); ClassValues.add("1"); for (int i = 0; i < numLabels; i++) { att[numAttributes + i] = new Attribute("Label" + (i + 1), ClassValues); attInfo.add(att[numAttributes + i]); } // Re-read file and convert into multi-label arff int countInstances = 0; aWriter = new BufferedWriter(new FileWriter(path + targetFilestem + ".arff")); Instances data = new Instances(relationName, attInfo, 0); aWriter.write(data.toString()); aReader = new BufferedReader(new FileReader(path + sourceFilename)); while ((Line = aReader.readLine()) != null) { countInstances++; // set all values to 0 double[] attValues = new double[numAttributes + numLabels]; Arrays.fill(attValues, 0); Instance tempInstance = new DenseInstance(1, attValues); tempInstance.setDataset(data); // separate class info from attribute info // ensure class info exists StringTokenizer strTok = new StringTokenizer(Line, " "); while (strTok.hasMoreTokens()) { String token = strTok.nextToken(); if (token.indexOf(":") == -1) { // parse label info StringTokenizer labelTok = new StringTokenizer(token, ","); while (labelTok.hasMoreTokens()) { String strLabel = labelTok.nextToken(); int intLabel = Integer.parseInt(strLabel); tempInstance.setValue(numAttributes + intLabel, 1); } } else { // parse attribute info StringTokenizer AttrTok = new StringTokenizer(token, ":"); String strAttrIndex = AttrTok.nextToken(); String strAttrValue = AttrTok.nextToken(); tempInstance.setValue(Integer.parseInt(strAttrIndex) - 1, Double.parseDouble(strAttrValue)); } } if (Sparse) { SparseInstance tempSparseInstance = new SparseInstance(tempInstance); aWriter.write(tempSparseInstance.toString() + "\n"); } else { aWriter.write(tempInstance.toString() + "\n"); } } } catch (IOException e) { e.printStackTrace(); } finally { try { if (aReader != null) { aReader.close(); } if (aWriter != null) { aWriter.close(); } } catch (IOException ex) { ex.printStackTrace(); } } }