Example usage for weka.core.converters CSVLoader setStringAttributes

List of usage examples for weka.core.converters CSVLoader setStringAttributes

Introduction

In this page you can find the example usage for weka.core.converters CSVLoader setStringAttributes.

Prototype

public void setStringAttributes(String value) 

Source Link

Document

Sets the attribute range to be forced to type string.

Usage

From source file:eu.linda.analytics.formats.CSVInputFormat.java

@Override
public AbstractList importData4weka(String pathToFile, boolean isForRDFOutput, Analytics analytics) {

    float timeToGetQuery = 0;
    long startTimeToGetQuery = System.currentTimeMillis();
    helpfulFuncions.nicePrintMessage("import CSV file ");

    System.out.println("Import data from file: " + pathToFile);

    Instances data = null;/*w w  w. j  a  v a 2  s  .  c o  m*/
    try {
        CSVLoader loader = new CSVLoader();
        loader.setSource(new File(pathToFile));
        if (isForRDFOutput) {
            loader.setStringAttributes("1,2");
        }

        loader.setFieldSeparator(",");
        data = loader.getDataSet();
        data.setClassIndex(data.numAttributes() - 1);

        FileInputStream fis = null;
        try {

            fis = new FileInputStream(pathToFile);
            System.out.println("fis.getChannel().size() " + fis.getChannel().size());
            analytics.setData_size(analytics.getData_size() + fis.getChannel().size());
        } finally {
            fis.close();
        }

        // Get elapsed time in milliseconds
        long elapsedTimeToGetQueryMillis = System.currentTimeMillis() - startTimeToGetQuery;
        // Get elapsed time in seconds
        timeToGetQuery = elapsedTimeToGetQueryMillis / 1000F;
        analytics.setTimeToGet_data(analytics.getTimeToGet_data() + timeToGetQuery);
        System.out.println("timeToGetQuery" + timeToGetQuery);

        connectionController.updateLindaAnalyticsInputDataPerformanceTime(analytics);

    } catch (Exception ex) {
        Logger.getLogger(ArffInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    }
    return data;

}

From source file:eu.linda.analytics.formats.CSVInputFormat.java

public static void main(String[] args) throws Exception {
    Instances data = null;//w  w  w.j a  v a  2 s  . com
    String[] options = new String[2];
    options[0] = "-S"; // "range"
    options[1] = "1,2";

    CSVLoader loader = new CSVLoader();
    try {
        loader.setSource(new File("/home/eleni/Desktop/mydatasets/NYRandonResearchTotest2.csv"));

        loader.setStringAttributes("1,2");
        loader.setFieldSeparator(",");

        data = loader.getDataSet();
        data.setClassIndex(data.numAttributes() - 1);

    } catch (IOException ex) {
        Logger.getLogger(CSVInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(CSVInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:eu.linda.analytics.formats.RDFInputFormat.java

@Override
public AbstractList importData4weka(String query_id, boolean isForRDFOutput, Analytics analytics) {

    String queryURI = connectionController.getQueryURI(query_id);

    helpfulFunctions.nicePrintMessage("import data from uri " + queryURI);

    Instances data = null;//from  www  . ja  va  2  s. c  o m
    try {
        float timeToGetQuery = 0;
        long startTimeToGetQuery = System.currentTimeMillis();
        URL url = new URL(queryURI);
        if (!helpfulFunctions.isURLResponsive(url)) {
            return null;
        }
        File tmpfile4lindaquery = File.createTempFile("tmpfile4lindaquery" + query_id, ".tmp");
        FileUtils.copyURLToFile(url, tmpfile4lindaquery);

        System.out.println("Downloaded File Query: " + tmpfile4lindaquery);

        CSVLoader loader = new CSVLoader();
        loader.setSource(tmpfile4lindaquery);
        if (isForRDFOutput) {
            loader.setStringAttributes("1,2");
        }

        loader.setFieldSeparator(",");
        data = loader.getDataSet();
        data.setClassIndex(data.numAttributes() - 1);

        FileInputStream fis = null;
        try {

            fis = new FileInputStream(tmpfile4lindaquery);
            System.out.println("fis.getChannel().size() " + fis.getChannel().size());
            analytics.setData_size(analytics.getData_size() + fis.getChannel().size());
        } finally {
            fis.close();
        }

        // Get elapsed time in milliseconds
        long elapsedTimeToGetQueryMillis = System.currentTimeMillis() - startTimeToGetQuery;
        // Get elapsed time in seconds
        timeToGetQuery = elapsedTimeToGetQueryMillis / 1000F;
        analytics.setTimeToGet_data(analytics.getTimeToGet_data() + timeToGetQuery);
        System.out.println("timeToGetQuery" + timeToGetQuery);

        connectionController.updateLindaAnalyticsInputDataPerformanceTime(analytics);

    } catch (Exception ex) {
        Logger.getLogger(ArffInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    }
    return data;

}

From source file:sentinets.Prediction.java

License:Open Source License

public int setInstances(String inputFile) {
    //String[] nominalVals =  {"42:positive,neutral,negative"};
    String[] nominalVals = { CLASSINDEX + ":" + StringUtils.join(classNames, ",") };
    original = null;//from  w ww . j  a  va2  s .c  o  m
    try {
        System.out.println("[In Prediction] Loading instances. ");
        CSVLoader csvSource = new CSVLoader();
        csvSource.setSource(new File(inputFile));
        csvSource.setFieldSeparator("\t");
        csvSource.setNominalAttributes(CLASSINDEX + "");
        csvSource.setStringAttributes(stringAttr);
        csvSource.setNominalLabelSpecs(nominalVals);
        original = csvSource.getDataSet();
        unlabled = original;
        classProbIndex = original.numAttributes() - 1;
        //System.out.println(unlabled.toSummaryString());
        Remove r = new Remove();
        //r.setAttributeIndices("3-4,6,10-12,14");
        if (classifierType == MODELTYPE.SENTIMENT || classifierType == MODELTYPE.SENTIMENT_WORD
                || classifierType == MODELTYPE.CUSTOM) {
            if (showProbability) {
                /*
                Add afilter;
                afilter = new Add();
                afilter.setAttributeName("last");
                afilter.setAttributeName("prediction_prob");
                afilter.setInputFormat(original);
                original = Filter.useFilter(original, afilter);
                classProbIndex = original.numAttributes()-1;*/
            }
            if (classifierType == MODELTYPE.SENTIMENT) {
                r.setAttributeIndices("3,4,6,8,10-12,14,42,43,45-last");
                System.out.println("Filtering instances for SENTIMENT");
            } else if (classifierType == MODELTYPE.SENTIMENT_WORD || classifierType == MODELTYPE.CUSTOM) {
                r.setAttributeIndices(removeAttr);
                System.out.println("Filtering instances for SENTIMENT WORD");
            }
        }
        //r.setAttributeIndices("3-4,6,10-12,14,40-41,43-last");
        r.setInputFormat(unlabled);
        unlabled = Remove.useFilter(unlabled, r);
        //System.out.println(unlabled.toSummaryString());
        r = new Remove();
        //System.out.println(unlabled.toSummaryString());

    } catch (FileNotFoundException e) {
        e.printStackTrace();
        return 1;
    } catch (IOException e) {
        e.printStackTrace();
        return 2;
    } catch (Exception e) {
        e.printStackTrace();
        return 3;
    }
    int cIdx = unlabled.numAttributes() - 1;
    unlabled.setClassIndex(cIdx);
    System.out.println(
            "Class Attribute is: " + unlabled.classAttribute() + " at index: " + unlabled.classIndex());
    return 0;
}

From source file:sentinets.SentiNets.java

License:Open Source License

public void setInstances(String inputFile) {
    String[] nominalVals = { "15:e,p", "16:s,na_ns" };
    original = null;/*from w  ww  .ja  v  a 2  s .  co  m*/
    try {
        CSVLoader csvSource = new CSVLoader();
        csvSource.setSource(new File(inputFile));
        csvSource.setFieldSeparator("\t");
        csvSource.setNominalAttributes("15-16");
        csvSource.setStringAttributes("3,4,6,8,10-12,14");
        csvSource.setNominalLabelSpecs(nominalVals);
        original = csvSource.getDataSet();
        unlabled = original;
        //System.out.println(unlabled.toSummaryString());
        Remove r = new Remove();
        r.setAttributeIndices("3-4,6,10-12,14");
        r.setInputFormat(unlabled);
        unlabled = Remove.useFilter(unlabled, r);
        //System.out.println(unlabled.toSummaryString());
        r = new Remove();
        if (classifierType == E_P) {
            System.out.println("Filtering instances for E_P");
            r.setAttributeIndices("9");
            r.setInputFormat(unlabled);
            unlabled = Remove.useFilter(unlabled, r);
        } else if (classifierType == S_NS) {
            System.out.println("Filtering instances for S_NS");
            r.setAttributeIndices("8");
            r.setInputFormat(unlabled);
            unlabled = Remove.useFilter(unlabled, r);
        }
        //System.out.println(unlabled.toSummaryString());

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
    int cIdx = unlabled.numAttributes() - 1;
    unlabled.setClassIndex(cIdx);
}

From source file:sentinets.TrainModel.java

License:Open Source License

public void setInstances(String inputFile) {
    String[] nominalVals = { "42:positive,negative,neutral" };
    ins = null;//from www  . j av a  2  s. com
    try {
        CSVLoader csvSource = new CSVLoader();
        csvSource.setSource(new File(inputFile));
        csvSource.setFieldSeparator("\t");
        csvSource.setNominalAttributes("15-16");
        csvSource.setStringAttributes("3,4,6,8,10-12,14");
        csvSource.setNominalLabelSpecs(nominalVals);
        ins = csvSource.getDataSet();
        Remove r = new Remove();
        r.setAttributeIndices("3-4,6,8,10-12,14,40-41");
        r.setInputFormat(ins);
        ins = Remove.useFilter(ins, r);
        //System.out.println(unlabled.toSummaryString());
        r = new Remove();
        System.out.println(ins.toSummaryString());

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
    int cIdx = ins.numAttributes() - 1;
    ins.setClassIndex(cIdx);
}