Example usage for org.apache.mahout.classifier.df.data DescriptorUtils parseDescriptor

List of usage examples for org.apache.mahout.classifier.df.data DescriptorUtils parseDescriptor

Introduction

In this page you can find the example usage for org.apache.mahout.classifier.df.data DescriptorUtils parseDescriptor.

Prototype

public static Attribute[] parseDescriptor(CharSequence descriptor) throws DescriptorException 

Source Link

Document

Parses a descriptor string and generates the corresponding array of Attributes

Usage

From source file:com.wsc.myexample.decisionForest.MyDataLoader.java

License:Apache License

/**
 * Generates the Dataset by parsing the entire data
 * //from   www  .java  2 s.com
 * @param descriptor
 *          attributes description
 * @param regression
 *                if true, the label is numerical
 * @param fs
 *          file system
 * @param path
 *          data path
 */
public static MyDataset generateDataset(CharSequence descriptor, boolean regression, String path)
        throws DescriptorException, IOException {
    Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);

    Scanner scanner = new Scanner(new File(path));

    // used to convert CATEGORICAL attribute to Integer
    @SuppressWarnings("unchecked")
    Set<String>[] valsets = new Set[attrs.length];

    int size = 0;
    while (scanner.hasNextLine()) {
        String line = scanner.nextLine();
        if (line.isEmpty()) {
            continue;
        }

        if (parseString(attrs, valsets, line, regression)) {
            size++;
        }
    }

    scanner.close();

    @SuppressWarnings("unchecked")
    List<String>[] values = new List[attrs.length];
    for (int i = 0; i < valsets.length; i++) {
        if (valsets[i] != null) {
            values[i] = Lists.newArrayList(valsets[i]);
        }
    }

    return new MyDataset(attrs, values, size, regression);
}

From source file:com.wsc.myexample.decisionForest.MyDataLoader.java

License:Apache License

/**
 * Generates the Dataset by parsing the entire data
 * /*w  w w . j av  a 2s . c o  m*/
 * @param descriptor
 *          attributes description
 */
public static MyDataset generateDataset(CharSequence descriptor, boolean regression, String[] data)
        throws DescriptorException {
    Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);

    // used to convert CATEGORICAL attributes to Integer
    @SuppressWarnings("unchecked")
    Set<String>[] valsets = new Set[attrs.length];

    int size = 0;
    for (String aData : data) {
        if (aData.isEmpty()) {
            continue;
        }

        if (parseString(attrs, valsets, aData, regression)) {
            size++;
        }
    }

    @SuppressWarnings("unchecked")
    List<String>[] values = new List[attrs.length];
    for (int i = 0; i < valsets.length; i++) {
        if (valsets[i] != null) {
            values[i] = Lists.newArrayList(valsets[i]);
        }
    }

    return new MyDataset(attrs, values, size, regression);
}