List of usage examples for org.apache.commons.csv CSVUtils parseLine
public static String[] parseLine(String s) throws IOException
From source file:com.idealista.solrmeter.view.component.TablePropertyPanel.java
@Override protected void setSelectedValue(String value) { this.table.removeAll(); String[] values;/*from w ww . j a v a 2 s . c o m*/ try { values = CSVUtils.parseLine(value); for (String val : values) { val = StringEscapeUtils.unescapeCsv(val); String[] pair = val.split("="); if (pair.length == 2) { this.table.setProperty(pair[0].trim(), pair[1].trim()); } else if (pair.length == 1) { this.table.setProperty(pair[0].trim(), ""); } } } catch (IOException e) { e.printStackTrace(); } }
From source file:chapter4.src.logistic.CsvRecordFactoryPredict.java
/** * Parse a single line of CSV-formatted text. * * Separated to make changing this functionality for the entire class easier * in the future.//from ww w . j a v a2 s. c om * @param line - CSV formatted text * @return List<String> */ private List<String> parseCsvLine(String line) { try { return Arrays.asList(CSVUtils.parseLine(line)); } catch (IOException e) { List<String> list = Lists.newArrayList(); list.add(line); return list; } }
From source file:org.apache.mahout.classifier.mlp.RunMultilayerPerceptron.java
public static void main(String[] args) throws Exception { Parameters parameters = new Parameters(); if (parseArgs(args, parameters)) { log.info("Load model from {}.", parameters.modelFilePathStr); MultilayerPerceptron mlp = new MultilayerPerceptron(parameters.modelFilePathStr); log.info("Topology of MLP: {}.", Arrays.toString(mlp.getLayerSizeList().toArray())); // validate the data log.info("Read the data..."); Path inputFilePath = new Path(parameters.inputFilePathStr); FileSystem inputFS = inputFilePath.getFileSystem(new Configuration()); if (!inputFS.exists(inputFilePath)) { log.error("Input file '{}' does not exists!", parameters.inputFilePathStr); mlp.close();// w ww . j a v a 2 s .co m return; } Path outputFilePath = new Path(parameters.outputFilePathStr); FileSystem outputFS = inputFilePath.getFileSystem(new Configuration()); if (outputFS.exists(outputFilePath)) { log.error("Output file '{}' already exists!", parameters.outputFilePathStr); mlp.close(); return; } if (!parameters.inputFileFormat.equals("csv")) { log.error("Currently only supports for csv format."); mlp.close(); return; // current only supports csv format } log.info("Read from column {} to column {}.", parameters.columnStart, parameters.columnEnd); BufferedWriter writer = null; BufferedReader reader = null; try { writer = new BufferedWriter(new OutputStreamWriter(outputFS.create(outputFilePath))); reader = new BufferedReader(new InputStreamReader(inputFS.open(inputFilePath))); String line; if (parameters.skipHeader) { reader.readLine(); } while ((line = reader.readLine()) != null) { String[] tokens = CSVUtils.parseLine(line); double[] features = new double[Math.min(parameters.columnEnd, tokens.length) - parameters.columnStart + 1]; for (int i = parameters.columnStart, j = 0; i < Math.min(parameters.columnEnd + 1, tokens.length); ++i, ++j) { features[j] = Double.parseDouble(tokens[i]); } Vector featureVec = new DenseVector(features); Vector res = mlp.getOutput(featureVec); int mostProbablyLabelIndex = res.maxValueIndex(); writer.write(String.valueOf(mostProbablyLabelIndex)); } mlp.close(); log.info("Labeling finished."); } finally { Closeables.close(reader, true); Closeables.close(writer, true); } } }
From source file:org.apache.mahout.classifier.mlp.TestNeuralNetwork.java
@Test public void testWithCancerDataSet() throws IOException { File cancerDataset = getTestTempFile("cancer.csv"); writeLines(cancerDataset, Datasets.CANCER); List<Vector> records = Lists.newArrayList(); // Returns a mutable list of the data List<String> cancerDataSetList = Files.readLines(cancerDataset, Charsets.UTF_8); // Skip the header line, hence remove the first element in the list cancerDataSetList.remove(0);// ww w .j ava 2s . c o m for (String line : cancerDataSetList) { String[] tokens = CSVUtils.parseLine(line); double[] values = new double[tokens.length]; for (int i = 0; i < tokens.length; ++i) { values[i] = Double.parseDouble(tokens[i]); } records.add(new DenseVector(values)); } int splitPoint = (int) (records.size() * 0.8); List<Vector> trainingSet = records.subList(0, splitPoint); List<Vector> testSet = records.subList(splitPoint, records.size()); // initialize neural network model NeuralNetwork ann = new MultilayerPerceptron(); int featureDimension = records.get(0).size() - 1; ann.addLayer(featureDimension, false, "Sigmoid"); ann.addLayer(featureDimension * 2, false, "Sigmoid"); ann.addLayer(1, true, "Sigmoid"); ann.setLearningRate(0.05).setMomentumWeight(0.5).setRegularizationWeight(0.001); int iteration = 2000; for (int i = 0; i < iteration; ++i) { for (Vector trainingInstance : trainingSet) { ann.trainOnline(trainingInstance); } } int correctInstances = 0; for (Vector testInstance : testSet) { Vector res = ann.getOutput(testInstance.viewPart(0, testInstance.size() - 1)); double actual = res.get(0); double expected = testInstance.get(testInstance.size() - 1); if (Math.abs(actual - expected) <= 0.1) { ++correctInstances; } } double accuracy = (double) correctInstances / testSet.size() * 100; assertTrue("The classifier is even worse than a random guesser!", accuracy > 50); System.out.printf("Cancer DataSet. Classification precision: %d/%d = %f%%\n", correctInstances, testSet.size(), accuracy); }
From source file:org.apache.mahout.classifier.mlp.TestNeuralNetwork.java
@Test public void testWithIrisDataSet() throws IOException { File irisDataset = getTestTempFile("iris.csv"); writeLines(irisDataset, Datasets.IRIS); int numOfClasses = 3; List<Vector> records = Lists.newArrayList(); // Returns a mutable list of the data List<String> irisDataSetList = Files.readLines(irisDataset, Charsets.UTF_8); // Skip the header line, hence remove the first element in the list irisDataSetList.remove(0);/*ww w. j a v a 2 s.c o m*/ for (String line : irisDataSetList) { String[] tokens = CSVUtils.parseLine(line); // Last three dimensions represent the labels double[] values = new double[tokens.length + numOfClasses - 1]; Arrays.fill(values, 0.0); for (int i = 0; i < tokens.length - 1; ++i) { values[i] = Double.parseDouble(tokens[i]); } // Add label values String label = tokens[tokens.length - 1]; if (label.equalsIgnoreCase("setosa")) { values[values.length - 3] = 1; } else if (label.equalsIgnoreCase("versicolor")) { values[values.length - 2] = 1; } else { // label 'virginica' values[values.length - 1] = 1; } records.add(new DenseVector(values)); } Collections.shuffle(records); int splitPoint = (int) (records.size() * 0.8); List<Vector> trainingSet = records.subList(0, splitPoint); List<Vector> testSet = records.subList(splitPoint, records.size()); // Initialize neural network model NeuralNetwork ann = new MultilayerPerceptron(); int featureDimension = records.get(0).size() - numOfClasses; ann.addLayer(featureDimension, false, "Sigmoid"); ann.addLayer(featureDimension * 2, false, "Sigmoid"); ann.addLayer(3, true, "Sigmoid"); // 3-class classification ann.setLearningRate(0.05).setMomentumWeight(0.4).setRegularizationWeight(0.005); int iteration = 2000; for (int i = 0; i < iteration; ++i) { for (Vector trainingInstance : trainingSet) { ann.trainOnline(trainingInstance); } } int correctInstances = 0; for (Vector testInstance : testSet) { Vector res = ann.getOutput(testInstance.viewPart(0, testInstance.size() - numOfClasses)); double[] actualLabels = new double[numOfClasses]; for (int i = 0; i < numOfClasses; ++i) { actualLabels[i] = res.get(i); } double[] expectedLabels = new double[numOfClasses]; for (int i = 0; i < numOfClasses; ++i) { expectedLabels[i] = testInstance.get(testInstance.size() - numOfClasses + i); } boolean allCorrect = true; for (int i = 0; i < numOfClasses; ++i) { if (Math.abs(expectedLabels[i] - actualLabels[i]) >= 0.1) { allCorrect = false; break; } } if (allCorrect) { ++correctInstances; } } double accuracy = (double) correctInstances / testSet.size() * 100; assertTrue("The model is even worse than a random guesser.", accuracy > 50); System.out.printf("Iris DataSet. Classification precision: %d/%d = %f%%\n", correctInstances, testSet.size(), accuracy); }