Example usage for weka.core.converters CSVLoader setFieldSeparator

List of usage examples for weka.core.converters CSVLoader setFieldSeparator

Introduction

In this page you can find the example usage for weka.core.converters CSVLoader setFieldSeparator.

Prototype

public void setFieldSeparator(String value) 

Source Link

Document

Sets the character used as column separator.

Usage

From source file:be.uza.keratoconus.analysis.impl.PreTrainedModel.java

License:Open Source License

@Override
public void processPatientExam(PatientExam exam) {
    examData = exam.getExamData();//from   www. j av a  2 s.co m
    String headerLine = "";
    String dataLine = "";
    int nColumns = 0;
    for (String fieldName : classificationModelService.getUsedFields()) {
        if (examData.containsKey(fieldName)) {
            headerLine += fieldName + SEMICOLON;
            final String fieldValue = examData.get(fieldName);
            // TODO fatal error if fieldValue is null?
            dataLine += fieldValue + SEMICOLON;
            ++nColumns;
        } else if (examData.containsKey(fieldName + " " + Face.FRONT)) {
            headerLine += fieldName + " " + Face.FRONT + SEMICOLON;
            final String frontFieldValue = examData.get(fieldName + " " + Face.FRONT);
            // TODO fatal error if fieldValue is null?
            dataLine += frontFieldValue + SEMICOLON;
            ++nColumns;
            headerLine += fieldName + " " + Face.BACK + SEMICOLON;
            final String backFieldValue = examData.get(fieldName + " " + Face.BACK);
            // TODO fatal error if fieldValue is null?
            dataLine += backFieldValue + SEMICOLON;
            ++nColumns;
        }
    }

    String csv = headerLine + "Class\n" + dataLine + "?\n";
    CSVLoader csvLoader = new CSVLoader();
    csvLoader.setFieldSeparator(SEMICOLON);
    try {
        csvLoader.setSource(new ByteArrayInputStream(csv.getBytes(Charset.forName("windows-1252"))));
        final Instances dataSet = csvLoader.getDataSet();
        dataSet.setClassIndex(nColumns);
        instance = dataSet.get(0);
    } catch (Exception e) {
        logService.log(ownComponentContext.getServiceReference(), LogService.LOG_WARNING,
                "Exception thrown when reading CSV record", e);
    }
}

From source file:eu.linda.analytics.formats.CSVInputFormat.java

@Override
public AbstractList importData4weka(String pathToFile, boolean isForRDFOutput, Analytics analytics) {

    float timeToGetQuery = 0;
    long startTimeToGetQuery = System.currentTimeMillis();
    helpfulFuncions.nicePrintMessage("import CSV file ");

    System.out.println("Import data from file: " + pathToFile);

    Instances data = null;//  ww w .ja  va2s. c o  m
    try {
        CSVLoader loader = new CSVLoader();
        loader.setSource(new File(pathToFile));
        if (isForRDFOutput) {
            loader.setStringAttributes("1,2");
        }

        loader.setFieldSeparator(",");
        data = loader.getDataSet();
        data.setClassIndex(data.numAttributes() - 1);

        FileInputStream fis = null;
        try {

            fis = new FileInputStream(pathToFile);
            System.out.println("fis.getChannel().size() " + fis.getChannel().size());
            analytics.setData_size(analytics.getData_size() + fis.getChannel().size());
        } finally {
            fis.close();
        }

        // Get elapsed time in milliseconds
        long elapsedTimeToGetQueryMillis = System.currentTimeMillis() - startTimeToGetQuery;
        // Get elapsed time in seconds
        timeToGetQuery = elapsedTimeToGetQueryMillis / 1000F;
        analytics.setTimeToGet_data(analytics.getTimeToGet_data() + timeToGetQuery);
        System.out.println("timeToGetQuery" + timeToGetQuery);

        connectionController.updateLindaAnalyticsInputDataPerformanceTime(analytics);

    } catch (Exception ex) {
        Logger.getLogger(ArffInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    }
    return data;

}

From source file:eu.linda.analytics.formats.CSVInputFormat.java

public static void main(String[] args) throws Exception {
    Instances data = null;/*from   www . j a va2 s  .  c  om*/
    String[] options = new String[2];
    options[0] = "-S"; // "range"
    options[1] = "1,2";

    CSVLoader loader = new CSVLoader();
    try {
        loader.setSource(new File("/home/eleni/Desktop/mydatasets/NYRandonResearchTotest2.csv"));

        loader.setStringAttributes("1,2");
        loader.setFieldSeparator(",");

        data = loader.getDataSet();
        data.setClassIndex(data.numAttributes() - 1);

    } catch (IOException ex) {
        Logger.getLogger(CSVInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(CSVInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:eu.linda.analytics.formats.RDFInputFormat.java

@Override
public AbstractList importData4weka(String query_id, boolean isForRDFOutput, Analytics analytics) {

    String queryURI = connectionController.getQueryURI(query_id);

    helpfulFunctions.nicePrintMessage("import data from uri " + queryURI);

    Instances data = null;// w  ww . j  a va  2  s. c o  m
    try {
        float timeToGetQuery = 0;
        long startTimeToGetQuery = System.currentTimeMillis();
        URL url = new URL(queryURI);
        if (!helpfulFunctions.isURLResponsive(url)) {
            return null;
        }
        File tmpfile4lindaquery = File.createTempFile("tmpfile4lindaquery" + query_id, ".tmp");
        FileUtils.copyURLToFile(url, tmpfile4lindaquery);

        System.out.println("Downloaded File Query: " + tmpfile4lindaquery);

        CSVLoader loader = new CSVLoader();
        loader.setSource(tmpfile4lindaquery);
        if (isForRDFOutput) {
            loader.setStringAttributes("1,2");
        }

        loader.setFieldSeparator(",");
        data = loader.getDataSet();
        data.setClassIndex(data.numAttributes() - 1);

        FileInputStream fis = null;
        try {

            fis = new FileInputStream(tmpfile4lindaquery);
            System.out.println("fis.getChannel().size() " + fis.getChannel().size());
            analytics.setData_size(analytics.getData_size() + fis.getChannel().size());
        } finally {
            fis.close();
        }

        // Get elapsed time in milliseconds
        long elapsedTimeToGetQueryMillis = System.currentTimeMillis() - startTimeToGetQuery;
        // Get elapsed time in seconds
        timeToGetQuery = elapsedTimeToGetQueryMillis / 1000F;
        analytics.setTimeToGet_data(analytics.getTimeToGet_data() + timeToGetQuery);
        System.out.println("timeToGetQuery" + timeToGetQuery);

        connectionController.updateLindaAnalyticsInputDataPerformanceTime(analytics);

    } catch (Exception ex) {
        Logger.getLogger(ArffInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    }
    return data;

}

From source file:sentinets.Prediction.java

License:Open Source License

public int setInstances(String inputFile) {
    //String[] nominalVals =  {"42:positive,neutral,negative"};
    String[] nominalVals = { CLASSINDEX + ":" + StringUtils.join(classNames, ",") };
    original = null;/* w w  w . j  a  v  a  2s . c o  m*/
    try {
        System.out.println("[In Prediction] Loading instances. ");
        CSVLoader csvSource = new CSVLoader();
        csvSource.setSource(new File(inputFile));
        csvSource.setFieldSeparator("\t");
        csvSource.setNominalAttributes(CLASSINDEX + "");
        csvSource.setStringAttributes(stringAttr);
        csvSource.setNominalLabelSpecs(nominalVals);
        original = csvSource.getDataSet();
        unlabled = original;
        classProbIndex = original.numAttributes() - 1;
        //System.out.println(unlabled.toSummaryString());
        Remove r = new Remove();
        //r.setAttributeIndices("3-4,6,10-12,14");
        if (classifierType == MODELTYPE.SENTIMENT || classifierType == MODELTYPE.SENTIMENT_WORD
                || classifierType == MODELTYPE.CUSTOM) {
            if (showProbability) {
                /*
                Add afilter;
                afilter = new Add();
                afilter.setAttributeName("last");
                afilter.setAttributeName("prediction_prob");
                afilter.setInputFormat(original);
                original = Filter.useFilter(original, afilter);
                classProbIndex = original.numAttributes()-1;*/
            }
            if (classifierType == MODELTYPE.SENTIMENT) {
                r.setAttributeIndices("3,4,6,8,10-12,14,42,43,45-last");
                System.out.println("Filtering instances for SENTIMENT");
            } else if (classifierType == MODELTYPE.SENTIMENT_WORD || classifierType == MODELTYPE.CUSTOM) {
                r.setAttributeIndices(removeAttr);
                System.out.println("Filtering instances for SENTIMENT WORD");
            }
        }
        //r.setAttributeIndices("3-4,6,10-12,14,40-41,43-last");
        r.setInputFormat(unlabled);
        unlabled = Remove.useFilter(unlabled, r);
        //System.out.println(unlabled.toSummaryString());
        r = new Remove();
        //System.out.println(unlabled.toSummaryString());

    } catch (FileNotFoundException e) {
        e.printStackTrace();
        return 1;
    } catch (IOException e) {
        e.printStackTrace();
        return 2;
    } catch (Exception e) {
        e.printStackTrace();
        return 3;
    }
    int cIdx = unlabled.numAttributes() - 1;
    unlabled.setClassIndex(cIdx);
    System.out.println(
            "Class Attribute is: " + unlabled.classAttribute() + " at index: " + unlabled.classIndex());
    return 0;
}

From source file:sentinets.SentiNets.java

License:Open Source License

public void setInstances(String inputFile) {
    String[] nominalVals = { "15:e,p", "16:s,na_ns" };
    original = null;//from w  w w. j a v a 2 s.co  m
    try {
        CSVLoader csvSource = new CSVLoader();
        csvSource.setSource(new File(inputFile));
        csvSource.setFieldSeparator("\t");
        csvSource.setNominalAttributes("15-16");
        csvSource.setStringAttributes("3,4,6,8,10-12,14");
        csvSource.setNominalLabelSpecs(nominalVals);
        original = csvSource.getDataSet();
        unlabled = original;
        //System.out.println(unlabled.toSummaryString());
        Remove r = new Remove();
        r.setAttributeIndices("3-4,6,10-12,14");
        r.setInputFormat(unlabled);
        unlabled = Remove.useFilter(unlabled, r);
        //System.out.println(unlabled.toSummaryString());
        r = new Remove();
        if (classifierType == E_P) {
            System.out.println("Filtering instances for E_P");
            r.setAttributeIndices("9");
            r.setInputFormat(unlabled);
            unlabled = Remove.useFilter(unlabled, r);
        } else if (classifierType == S_NS) {
            System.out.println("Filtering instances for S_NS");
            r.setAttributeIndices("8");
            r.setInputFormat(unlabled);
            unlabled = Remove.useFilter(unlabled, r);
        }
        //System.out.println(unlabled.toSummaryString());

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
    int cIdx = unlabled.numAttributes() - 1;
    unlabled.setClassIndex(cIdx);
}

From source file:sentinets.TrainModel.java

License:Open Source License

public void setInstances(String inputFile) {
    String[] nominalVals = { "42:positive,negative,neutral" };
    ins = null;//from   ww  w  . jav  a  2s .co m
    try {
        CSVLoader csvSource = new CSVLoader();
        csvSource.setSource(new File(inputFile));
        csvSource.setFieldSeparator("\t");
        csvSource.setNominalAttributes("15-16");
        csvSource.setStringAttributes("3,4,6,8,10-12,14");
        csvSource.setNominalLabelSpecs(nominalVals);
        ins = csvSource.getDataSet();
        Remove r = new Remove();
        r.setAttributeIndices("3-4,6,8,10-12,14,40-41");
        r.setInputFormat(ins);
        ins = Remove.useFilter(ins, r);
        //System.out.println(unlabled.toSummaryString());
        r = new Remove();
        System.out.println(ins.toSummaryString());

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
    int cIdx = ins.numAttributes() - 1;
    ins.setClassIndex(cIdx);
}