List of usage examples for org.apache.mahout.classifier.df.data DescriptorUtils parseDescriptor
public static Attribute[] parseDescriptor(CharSequence descriptor) throws DescriptorException
From source file:com.wsc.myexample.decisionForest.MyDataLoader.java
License:Apache License
/** * Generates the Dataset by parsing the entire data * //from www .java 2 s.com * @param descriptor * attributes description * @param regression * if true, the label is numerical * @param fs * file system * @param path * data path */ public static MyDataset generateDataset(CharSequence descriptor, boolean regression, String path) throws DescriptorException, IOException { Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor); Scanner scanner = new Scanner(new File(path)); // used to convert CATEGORICAL attribute to Integer @SuppressWarnings("unchecked") Set<String>[] valsets = new Set[attrs.length]; int size = 0; while (scanner.hasNextLine()) { String line = scanner.nextLine(); if (line.isEmpty()) { continue; } if (parseString(attrs, valsets, line, regression)) { size++; } } scanner.close(); @SuppressWarnings("unchecked") List<String>[] values = new List[attrs.length]; for (int i = 0; i < valsets.length; i++) { if (valsets[i] != null) { values[i] = Lists.newArrayList(valsets[i]); } } return new MyDataset(attrs, values, size, regression); }
From source file:com.wsc.myexample.decisionForest.MyDataLoader.java
License:Apache License
/** * Generates the Dataset by parsing the entire data * /*w w w . j av a 2s . c o m*/ * @param descriptor * attributes description */ public static MyDataset generateDataset(CharSequence descriptor, boolean regression, String[] data) throws DescriptorException { Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor); // used to convert CATEGORICAL attributes to Integer @SuppressWarnings("unchecked") Set<String>[] valsets = new Set[attrs.length]; int size = 0; for (String aData : data) { if (aData.isEmpty()) { continue; } if (parseString(attrs, valsets, aData, regression)) { size++; } } @SuppressWarnings("unchecked") List<String>[] values = new List[attrs.length]; for (int i = 0; i < valsets.length; i++) { if (valsets[i] != null) { values[i] = Lists.newArrayList(valsets[i]); } } return new MyDataset(attrs, values, size, regression); }