Example usage for weka.filters.unsupervised.attribute StringToWordVector setAttributeIndices

List of usage examples for weka.filters.unsupervised.attribute StringToWordVector setAttributeIndices

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute StringToWordVector setAttributeIndices.

Prototype

public void setAttributeIndices(String rangeList) 

Source Link

Document

Sets which attributes are to be worked on.

Usage

From source file:com.reactivetechnologies.analytics.lucene.InstanceTokenizer.java

License:Open Source License

/**
 * Converts String attributes into a set of attributes representing word occurrence information from the text contained in the strings. 
 * The set of words (attributes) is determined by the first batch filtered (typically training data). Uses a Lucene analyzer to tokenize
 * the string. NOTE: The text string should either be the first or last attribute
 * @param dataRaw/*from www .j a v a 2  s .c o  m*/
 * @param opts
 * @param isLast - whether last attribute is the text to be filtered, else first
 * @return
 * @throws Exception
 * @see {@linkplain StringToWordVector}
 */
public static Instances filter(Instances dataRaw, String opts, boolean isLast) throws Exception {
    StringToWordVector filter = new StringToWordVector();
    if (StringUtils.hasText(opts)) {
        filter.setOptions(Utils.splitOptions(opts));
    }
    filter.setTokenizer(new InstanceTokenizer());
    filter.setUseStoplist(false);//ignore any other stop list
    filter.setStemmer(new NullStemmer());//ignore any other stemmer
    filter.setInputFormat(dataRaw);
    filter.setAttributeIndices(isLast ? "last" : "first");
    return Filter.useFilter(dataRaw, filter);
}

From source file:nl.uva.expose.classification.WekaClassification.java

private void getWordVector(Instances dRaw, Instances dFiltered) throws Exception {
    StringToWordVector filter = new StringToWordVector();
    filter.setAttributeIndices("first-last");
    filter.setIDFTransform(true);//from   w w  w. j  a  v  a 2  s .co  m
    filter.setLowerCaseTokens(true);
    filter.setMinTermFreq(2);
    filter.setLowerCaseTokens(true);
    filter.setNormalizeDocLength(
            new SelectedTag(StringToWordVector.FILTER_NORMALIZE_ALL, StringToWordVector.TAGS_FILTER));
    filter.setOutputWordCounts(true);
    //        filter.setTokenizer();
    //        filter.setWordsToKeep();
    filter.setInputFormat(dRaw);
    dFiltered = Filter.useFilter(dRaw, filter);
}