List of usage examples for weka.filters.unsupervised.attribute StringToWordVector setTFTransform
public void setTFTransform(boolean TFTransform)
From source file:org.montp2.m1decol.ter.gramms.filters.FilterTokenizerIDFT.java
License:Open Source License
public void indexingToTokenizer(String inPath, String outPath) throws Exception { WordTokenizer wordTokenizer = new WordTokenizer(); wordTokenizer.setDelimiters("\r \t.,;:'\"()?!"); Instances inputInstances = WekaUtils.loadARFF(inPath); StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(inputInstances); filter.setIDFTransform(true);//w w w. j a v a 2 s. com filter.setTFTransform(true); filter.setDoNotOperateOnPerClassBasis(false); filter.setInvertSelection(false); filter.setLowerCaseTokens(true); filter.setMinTermFreq(3); filter.setOutputWordCounts(true); filter.setTokenizer(wordTokenizer); filter.setUseStoplist(true); filter.setWordsToKeep(200); Instances outputInstances = Filter.useFilter(inputInstances, filter); OutputStreamUtils.writeSimple(outputInstances.toString(), outPath); }