Example usage for org.apache.commons.csv CSVUtils parseLine

List of usage examples for org.apache.commons.csv CSVUtils parseLine

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVUtils parseLine.

Prototype

public static String[] parseLine(String s) throws IOException 

Source Link

Document

Parses the first line only according to the default CSVStrategy .

Usage

From source file:com.idealista.solrmeter.view.component.TablePropertyPanel.java

@Override
protected void setSelectedValue(String value) {
    this.table.removeAll();

    String[] values;/*from w ww  . j a  v  a 2 s  .  c  o  m*/
    try {
        values = CSVUtils.parseLine(value);

        for (String val : values) {
            val = StringEscapeUtils.unescapeCsv(val);
            String[] pair = val.split("=");
            if (pair.length == 2) {
                this.table.setProperty(pair[0].trim(), pair[1].trim());
            } else if (pair.length == 1) {
                this.table.setProperty(pair[0].trim(), "");
            }
        }

    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:chapter4.src.logistic.CsvRecordFactoryPredict.java

/**
 * Parse a single line of CSV-formatted text.
 *
 * Separated to make changing this functionality for the entire class easier
 * in the future.//from  ww  w  .  j a  v  a2  s. c om
 * @param line - CSV formatted text
 * @return List<String>
 */
private List<String> parseCsvLine(String line) {
    try {
        return Arrays.asList(CSVUtils.parseLine(line));
    } catch (IOException e) {
        List<String> list = Lists.newArrayList();
        list.add(line);
        return list;
    }
}

From source file:org.apache.mahout.classifier.mlp.RunMultilayerPerceptron.java

public static void main(String[] args) throws Exception {

    Parameters parameters = new Parameters();

    if (parseArgs(args, parameters)) {
        log.info("Load model from {}.", parameters.modelFilePathStr);
        MultilayerPerceptron mlp = new MultilayerPerceptron(parameters.modelFilePathStr);

        log.info("Topology of MLP: {}.", Arrays.toString(mlp.getLayerSizeList().toArray()));

        // validate the data
        log.info("Read the data...");
        Path inputFilePath = new Path(parameters.inputFilePathStr);
        FileSystem inputFS = inputFilePath.getFileSystem(new Configuration());
        if (!inputFS.exists(inputFilePath)) {
            log.error("Input file '{}' does not exists!", parameters.inputFilePathStr);
            mlp.close();// w ww . j  a  v  a  2  s .co m
            return;
        }

        Path outputFilePath = new Path(parameters.outputFilePathStr);
        FileSystem outputFS = inputFilePath.getFileSystem(new Configuration());
        if (outputFS.exists(outputFilePath)) {
            log.error("Output file '{}' already exists!", parameters.outputFilePathStr);
            mlp.close();
            return;
        }

        if (!parameters.inputFileFormat.equals("csv")) {
            log.error("Currently only supports for csv format.");
            mlp.close();
            return; // current only supports csv format
        }

        log.info("Read from column {} to column {}.", parameters.columnStart, parameters.columnEnd);

        BufferedWriter writer = null;
        BufferedReader reader = null;

        try {
            writer = new BufferedWriter(new OutputStreamWriter(outputFS.create(outputFilePath)));
            reader = new BufferedReader(new InputStreamReader(inputFS.open(inputFilePath)));

            String line;

            if (parameters.skipHeader) {
                reader.readLine();
            }

            while ((line = reader.readLine()) != null) {
                String[] tokens = CSVUtils.parseLine(line);
                double[] features = new double[Math.min(parameters.columnEnd, tokens.length)
                        - parameters.columnStart + 1];

                for (int i = parameters.columnStart, j = 0; i < Math.min(parameters.columnEnd + 1,
                        tokens.length); ++i, ++j) {
                    features[j] = Double.parseDouble(tokens[i]);
                }
                Vector featureVec = new DenseVector(features);
                Vector res = mlp.getOutput(featureVec);
                int mostProbablyLabelIndex = res.maxValueIndex();
                writer.write(String.valueOf(mostProbablyLabelIndex));
            }
            mlp.close();
            log.info("Labeling finished.");
        } finally {
            Closeables.close(reader, true);
            Closeables.close(writer, true);
        }
    }
}

From source file:org.apache.mahout.classifier.mlp.TestNeuralNetwork.java

@Test
public void testWithCancerDataSet() throws IOException {

    File cancerDataset = getTestTempFile("cancer.csv");
    writeLines(cancerDataset, Datasets.CANCER);

    List<Vector> records = Lists.newArrayList();
    // Returns a mutable list of the data
    List<String> cancerDataSetList = Files.readLines(cancerDataset, Charsets.UTF_8);
    // Skip the header line, hence remove the first element in the list
    cancerDataSetList.remove(0);//  ww  w  .j ava  2s  . c o m
    for (String line : cancerDataSetList) {
        String[] tokens = CSVUtils.parseLine(line);
        double[] values = new double[tokens.length];
        for (int i = 0; i < tokens.length; ++i) {
            values[i] = Double.parseDouble(tokens[i]);
        }
        records.add(new DenseVector(values));
    }

    int splitPoint = (int) (records.size() * 0.8);
    List<Vector> trainingSet = records.subList(0, splitPoint);
    List<Vector> testSet = records.subList(splitPoint, records.size());

    // initialize neural network model
    NeuralNetwork ann = new MultilayerPerceptron();
    int featureDimension = records.get(0).size() - 1;
    ann.addLayer(featureDimension, false, "Sigmoid");
    ann.addLayer(featureDimension * 2, false, "Sigmoid");
    ann.addLayer(1, true, "Sigmoid");
    ann.setLearningRate(0.05).setMomentumWeight(0.5).setRegularizationWeight(0.001);

    int iteration = 2000;
    for (int i = 0; i < iteration; ++i) {
        for (Vector trainingInstance : trainingSet) {
            ann.trainOnline(trainingInstance);
        }
    }

    int correctInstances = 0;
    for (Vector testInstance : testSet) {
        Vector res = ann.getOutput(testInstance.viewPart(0, testInstance.size() - 1));
        double actual = res.get(0);
        double expected = testInstance.get(testInstance.size() - 1);
        if (Math.abs(actual - expected) <= 0.1) {
            ++correctInstances;
        }
    }
    double accuracy = (double) correctInstances / testSet.size() * 100;
    assertTrue("The classifier is even worse than a random guesser!", accuracy > 50);
    System.out.printf("Cancer DataSet. Classification precision: %d/%d = %f%%\n", correctInstances,
            testSet.size(), accuracy);
}

From source file:org.apache.mahout.classifier.mlp.TestNeuralNetwork.java

@Test
public void testWithIrisDataSet() throws IOException {

    File irisDataset = getTestTempFile("iris.csv");
    writeLines(irisDataset, Datasets.IRIS);

    int numOfClasses = 3;
    List<Vector> records = Lists.newArrayList();
    // Returns a mutable list of the data
    List<String> irisDataSetList = Files.readLines(irisDataset, Charsets.UTF_8);
    // Skip the header line, hence remove the first element in the list
    irisDataSetList.remove(0);/*ww  w. j a  v a 2 s.c  o m*/

    for (String line : irisDataSetList) {
        String[] tokens = CSVUtils.parseLine(line);
        // Last three dimensions represent the labels
        double[] values = new double[tokens.length + numOfClasses - 1];
        Arrays.fill(values, 0.0);
        for (int i = 0; i < tokens.length - 1; ++i) {
            values[i] = Double.parseDouble(tokens[i]);
        }
        // Add label values
        String label = tokens[tokens.length - 1];
        if (label.equalsIgnoreCase("setosa")) {
            values[values.length - 3] = 1;
        } else if (label.equalsIgnoreCase("versicolor")) {
            values[values.length - 2] = 1;
        } else { // label 'virginica'
            values[values.length - 1] = 1;
        }
        records.add(new DenseVector(values));
    }

    Collections.shuffle(records);

    int splitPoint = (int) (records.size() * 0.8);
    List<Vector> trainingSet = records.subList(0, splitPoint);
    List<Vector> testSet = records.subList(splitPoint, records.size());

    // Initialize neural network model
    NeuralNetwork ann = new MultilayerPerceptron();
    int featureDimension = records.get(0).size() - numOfClasses;
    ann.addLayer(featureDimension, false, "Sigmoid");
    ann.addLayer(featureDimension * 2, false, "Sigmoid");
    ann.addLayer(3, true, "Sigmoid"); // 3-class classification
    ann.setLearningRate(0.05).setMomentumWeight(0.4).setRegularizationWeight(0.005);

    int iteration = 2000;
    for (int i = 0; i < iteration; ++i) {
        for (Vector trainingInstance : trainingSet) {
            ann.trainOnline(trainingInstance);
        }
    }

    int correctInstances = 0;
    for (Vector testInstance : testSet) {
        Vector res = ann.getOutput(testInstance.viewPart(0, testInstance.size() - numOfClasses));
        double[] actualLabels = new double[numOfClasses];
        for (int i = 0; i < numOfClasses; ++i) {
            actualLabels[i] = res.get(i);
        }
        double[] expectedLabels = new double[numOfClasses];
        for (int i = 0; i < numOfClasses; ++i) {
            expectedLabels[i] = testInstance.get(testInstance.size() - numOfClasses + i);
        }

        boolean allCorrect = true;
        for (int i = 0; i < numOfClasses; ++i) {
            if (Math.abs(expectedLabels[i] - actualLabels[i]) >= 0.1) {
                allCorrect = false;
                break;
            }
        }
        if (allCorrect) {
            ++correctInstances;
        }
    }

    double accuracy = (double) correctInstances / testSet.size() * 100;
    assertTrue("The model is even worse than a random guesser.", accuracy > 50);

    System.out.printf("Iris DataSet. Classification precision: %d/%d = %f%%\n", correctInstances,
            testSet.size(), accuracy);
}