Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package parkinsonpredictor; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import weka.core.converters.ConverterUtils.DataSource; import weka.core.Instances; import weka.filters.Filter; import weka.filters.unsupervised.attribute.PrincipalComponents; import java.util.logging.Level; import java.util.logging.Logger; /** * * @author Pierre */ public class ParkinsonPredictor { /** * @param args the command line arguments */ public static void main(String[] args) { //prepareData(); doPCA(); parseDataAfterPCA(); scaleData(); trainData(); } /** * Prepare the data from the file to make it readable for the LIBSVM * First version without PCA */ public static void prepareData() { final String fileToParse = "../parkinsons_updrs.csv"; // file path final String csvDelimiter = ","; // character used to separate each data BufferedReader br = null; try { String sCurrentLine; // will get the first line of file until the end br = new BufferedReader(new FileReader(fileToParse)); boolean headerDone = false; // Know when we've browsed the header (first line of file) final int firstMarkerIndex = 6; // Index of the first marker final int numberOfMarker = 16; // Number of marker in file List<String> linesToWrite = new ArrayList<>(); // Store the line to insert in the new file // Parse the file and format the line while ((sCurrentLine = br.readLine()) != null) { if (headerDone) { String[] data = sCurrentLine.split(csvDelimiter); // Get each element of the line //String totalScore = data[5]; String totalScore = (Float.parseFloat(data[5])) + ""; // Get the score totalScore = totalScore.substring(0, totalScore.indexOf('.')); // Get the int part of the score String readableLine = totalScore; // We start the line by the score for (int i = firstMarkerIndex; i < firstMarkerIndex + numberOfMarker; i++) { // We continue the line by all the marker // We follow the template given : <Label> <index1>:<value1> <index2>:<value2> readableLine += " " + (i - firstMarkerIndex + 1) + ":" + data[i]; } linesToWrite.add(readableLine); // We store the line to insert, and continu to parse the file } else { headerDone = true; } } // Writing the file Path file = Paths.get("parkinsonData.txt"); // Init the readable file Files.write(file, linesToWrite, Charset.forName("UTF-8")); // write content } catch (IOException e) { e.printStackTrace(); } finally { try { if (br != null) { br.close(); } } catch (IOException ex) { ex.printStackTrace(); } } } /** * Parse data after pca in order to get the right format for libsvm */ public static void parseDataAfterPCA() { final String fileToParse = ".\\parkinsonDataPCA.txt"; // file path final String csvDelimiter = ","; // character used to separate each data BufferedReader br = null; try { String sCurrentLine; // will get the first line of file until the end boolean headerDone = false; // Know when we've browsed the header (first line of file) br = new BufferedReader(new FileReader(fileToParse)); List<String> linesToWrite = new ArrayList<>(); // Store the line to insert in the new file // Parse the file and format the line while ((sCurrentLine = br.readLine()) != null) { if (headerDone) { String[] data = sCurrentLine.replace("{", "").replace("}", "").split(csvDelimiter); String readableLine = data[5].split(" ")[1]; for (int i = 0; i < 5; i++) { readableLine += " " + (i + 1) + ":" + data[i].split(" ")[1]; } linesToWrite.add(readableLine); // We store the line to insert, and continu to parse the file } else { if (br.readLine().contains("@data")) { headerDone = true; } } } // Writing the file Path file = Paths.get("parkinsonData.txt"); // Init the readable file Files.write(file, linesToWrite, Charset.forName("UTF-8")); // write content } catch (Exception e) { e.printStackTrace(); } } /** * Do principal component analysis */ public static void doPCA() { PrincipalComponents pc = new PrincipalComponents(); DataSource source; try { source = new DataSource(".\\parkinsonDataTruncate.libsvm"); Instances data = source.getDataSet(); pc.setInputFormat(data); pc.setMaximumAttributes(100); Instances newData = Filter.useFilter(data, pc); Path file = Paths.get("parkinsonDataPCA.txt"); List<String> lines = Arrays.asList(newData.toString()); Files.write(file, lines, Charset.forName("UTF-8")); } catch (Exception e) { e.printStackTrace(); } } /** * Scale data between -1 and 1 to be more accurate */ public static void scaleData() { try { Runtime rt = Runtime.getRuntime(); rt.exec(".\\libsvm-3.21\\windows\\svm-scale.exe parkinsonData.txt > parkinsonDataScaled.txt"); } catch (IOException ex) { Logger.getLogger(ParkinsonPredictor.class.getName()).log(Level.SEVERE, null, ex); } } /** * Train the svm with the data */ public static void trainData() { try { Runtime rt = Runtime.getRuntime(); rt.exec(".\\libsvm-3.21\\windows\\svm-train.exe -s 4 -v 8 -t 1 -d 9 -r 2 parkinsonDataScaled.txt >> 20%"); /* -v : cross validation mode -t : kernel type : 1 polinomial (gamma*u'*v + coef0)^degree -d : degree -r : coef0 */ } catch (IOException ex) { Logger.getLogger(ParkinsonPredictor.class.getName()).log(Level.SEVERE, null, ex); } } }