Example usage for weka.core.stemmers NullStemmer NullStemmer

List of usage examples for weka.core.stemmers NullStemmer NullStemmer

Introduction

In this page you can find the example usage for weka.core.stemmers NullStemmer NullStemmer.

Prototype

NullStemmer

Source Link

Usage

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * the stemming algorithm to use, null means no stemming at all (i.e., the
 * NullStemmer is used).//from ww w.  ja v  a  2s  .  c om
 * 
 * @param value
 *            the configured stemming algorithm, or null
 * @see NullStemmer
 */
public void setStemmer(Stemmer value) {
    if (value != null)
        m_Stemmer = value;
    else
        m_Stemmer = new NullStemmer();
}

From source file:com.reactivetechnologies.analytics.lucene.InstanceTokenizer.java

License:Open Source License

/**
 * Converts String attributes into a set of attributes representing word occurrence information from the text contained in the strings. 
 * The set of words (attributes) is determined by the first batch filtered (typically training data). Uses a Lucene analyzer to tokenize
 * the string. NOTE: The text string should either be the first or last attribute
 * @param dataRaw//from   www.  ja  va2s. c  o  m
 * @param opts
 * @param isLast - whether last attribute is the text to be filtered, else first
 * @return
 * @throws Exception
 * @see {@linkplain StringToWordVector}
 */
public static Instances filter(Instances dataRaw, String opts, boolean isLast) throws Exception {
    StringToWordVector filter = new StringToWordVector();
    if (StringUtils.hasText(opts)) {
        filter.setOptions(Utils.splitOptions(opts));
    }
    filter.setTokenizer(new InstanceTokenizer());
    filter.setUseStoplist(false);//ignore any other stop list
    filter.setStemmer(new NullStemmer());//ignore any other stemmer
    filter.setInputFormat(dataRaw);
    filter.setAttributeIndices(isLast ? "last" : "first");
    return Filter.useFilter(dataRaw, filter);
}

From source file:com.reactivetechnologies.analytics.lucene.TextInstanceFilter.java

License:Open Source License

/**
 * Converts String attributes into a set of attributes representing word occurrence information from the text contained in the strings. 
 * The set of words (attributes) is determined by the first batch filtered (typically training data). Uses a Lucene analyzer to tokenize
 * the string. NOTE: The text string should either be the first or last attribute
 * @param dataRaw//from   w w  w .j  ava 2  s  .com
 * @param opts
 * @param isLast - whether last attribute is the text to be filtered, else first
 * @return
 * @throws Exception
 * @see {@linkplain StringToWordVector}
 */
public static Instances filter(Instances dataRaw, String opts, boolean isLast) throws Exception {
    TextInstanceFilter filter = new TextInstanceFilter();
    if (StringUtils.hasText(opts)) {
        filter.setOptions(Utils.splitOptions(opts));
    }
    filter.setTokenizer(new InstanceTokenizer());
    filter.setUseStoplist(false);//ignore any other stop list
    filter.setStemmer(new NullStemmer());//ignore any other stemmer
    filter.setInputFormat(dataRaw);
    filter.setAttributeIndices(isLast ? "last" : "first");
    filter.setDoNotOperateOnPerClassBasis(true);
    filter.setWordsToKeep(10000);
    return useFilter(dataRaw, filter);
}