parkinsonpredictor.ParkinsonPredictor.java Source code

Introduction

Here is the source code for parkinsonpredictor.ParkinsonPredictor.java
Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package parkinsonpredictor;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import weka.core.converters.ConverterUtils.DataSource;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.PrincipalComponents;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 *
 * @author Pierre
 */
public class ParkinsonPredictor {

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) {
        //prepareData();
        doPCA();
        parseDataAfterPCA();
        scaleData();
        trainData();
    }

    /**
     * Prepare the data from the file to make it readable for the LIBSVM
     * First version without PCA
     */
    public static void prepareData() {
        final String fileToParse = "../parkinsons_updrs.csv"; // file path
        final String csvDelimiter = ","; // character used to separate each data
        BufferedReader br = null;

        try {

            String sCurrentLine; // will get the first line of file until the end

            br = new BufferedReader(new FileReader(fileToParse));
            boolean headerDone = false; // Know when we've browsed the header (first line of file)
            final int firstMarkerIndex = 6; // Index of the first marker
            final int numberOfMarker = 16; // Number of marker in file
            List<String> linesToWrite = new ArrayList<>(); // Store the line to insert in the new file

            // Parse the file and format the line 
            while ((sCurrentLine = br.readLine()) != null) {
                if (headerDone) {
                    String[] data = sCurrentLine.split(csvDelimiter); // Get each element of the line
                    //String totalScore = data[5];                      
                    String totalScore = (Float.parseFloat(data[5])) + ""; // Get the score
                    totalScore = totalScore.substring(0, totalScore.indexOf('.')); // Get the int part of the score
                    String readableLine = totalScore; // We start the line by the score

                    for (int i = firstMarkerIndex; i < firstMarkerIndex + numberOfMarker; i++) {
                        // We continue the line by all the marker
                        // We follow the template given : <Label> <index1>:<value1> <index2>:<value2>
                        readableLine += " " + (i - firstMarkerIndex + 1) + ":" + data[i];

                    }

                    linesToWrite.add(readableLine); // We store the line to insert, and continu to parse the file

                } else {
                    headerDone = true;
                }
            }

            // Writing the file
            Path file = Paths.get("parkinsonData.txt"); // Init the readable file
            Files.write(file, linesToWrite, Charset.forName("UTF-8")); // write content

        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                if (br != null) {
                    br.close();
                }
            } catch (IOException ex) {
                ex.printStackTrace();
            }
        }

    }

    /**
     * Parse data after pca in order to get the right format for libsvm
     */
    public static void parseDataAfterPCA() {
        final String fileToParse = ".\\parkinsonDataPCA.txt"; // file path
        final String csvDelimiter = ","; // character used to separate each data
        BufferedReader br = null;

        try {
            String sCurrentLine; // will get the first line of file until the end
            boolean headerDone = false; // Know when we've browsed the header (first line of file)
            br = new BufferedReader(new FileReader(fileToParse));
            List<String> linesToWrite = new ArrayList<>(); // Store the line to insert in the new file

            // Parse the file and format the line 
            while ((sCurrentLine = br.readLine()) != null) {
                if (headerDone) {
                    String[] data = sCurrentLine.replace("{", "").replace("}", "").split(csvDelimiter);
                    String readableLine = data[5].split(" ")[1];
                    for (int i = 0; i < 5; i++) {
                        readableLine += " " + (i + 1) + ":" + data[i].split(" ")[1];
                    }
                    linesToWrite.add(readableLine); // We store the line to insert, and continu to parse the file
                } else {
                    if (br.readLine().contains("@data")) {
                        headerDone = true;
                    }
                }
            }
            // Writing the file
            Path file = Paths.get("parkinsonData.txt"); // Init the readable file
            Files.write(file, linesToWrite, Charset.forName("UTF-8")); // write content
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * Do principal component analysis
     */
    public static void doPCA() {
        PrincipalComponents pc = new PrincipalComponents();
        DataSource source;
        try {
            source = new DataSource(".\\parkinsonDataTruncate.libsvm");
            Instances data = source.getDataSet();
            pc.setInputFormat(data);
            pc.setMaximumAttributes(100);
            Instances newData = Filter.useFilter(data, pc);

            Path file = Paths.get("parkinsonDataPCA.txt");
            List<String> lines = Arrays.asList(newData.toString());
            Files.write(file, lines, Charset.forName("UTF-8"));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * Scale data between -1 and 1 to be more accurate
     */
    public static void scaleData() {
        try {
            Runtime rt = Runtime.getRuntime();
            rt.exec(".\\libsvm-3.21\\windows\\svm-scale.exe parkinsonData.txt > parkinsonDataScaled.txt");
        } catch (IOException ex) {
            Logger.getLogger(ParkinsonPredictor.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /**
     * Train the svm with the data
     */
    public static void trainData() {
        try {
            Runtime rt = Runtime.getRuntime();
            rt.exec(".\\libsvm-3.21\\windows\\svm-train.exe -s 4 -v 8 -t 1 -d 9 -r 2 parkinsonDataScaled.txt >> 20%");
            /*
            -v : cross validation mode
            -t : kernel type : 1 polinomial (gamma*u'*v + coef0)^degree
            -d :  degree
            -r : coef0
            */
        } catch (IOException ex) {
            Logger.getLogger(ParkinsonPredictor.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}