List of usage examples for org.apache.lucene.analysis.standard ClassicTokenizer ClassicTokenizer
public ClassicTokenizer(AttributeFactory factory)
From source file:ie.cmrc.smtx.lucene.analysis.LanguageBasedAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { //Tokenizer source = new StandardTokenizer(Version.LUCENE_46, reader); //Tokenizer source = new WhitespaceTokenizer(Version.LUCENE_46, reader); Tokenizer source = new ClassicTokenizer(reader); TokenStream filter = new StandardFilter(source); filter = new LowerCaseFilter(filter); filter = new StopFilter(filter, this.getStopWordsSet(language)); filter = new KStemFilter(filter); filter = new ASCIIFoldingFilter(filter); return new TokenStreamComponents(source, filter); }
From source file:org.meresco.lucene.analysis.MerescoStandardAnalyzer.java
License:Open Source License
public List<String> post_analyse(String fieldName, String string) throws IOException { ClassicTokenizer src = new ClassicTokenizer(new StringReader(string)); TokenStream tok = this.post_analyzer(fieldName, src); return this.readTokenStream(tok); }
From source file:org.meresco.lucene.analysis.MerescoStandardAnalyzer.java
License:Open Source License
protected Analyzer.TokenStreamComponents pre_analyzer(Reader reader) { final ClassicTokenizer src = new ClassicTokenizer(reader); TokenStream tok = new ClassicFilter(src); tok = new ASCIIFoldingFilter(tok); tok = new LowerCaseFilter(tok); return new Analyzer.TokenStreamComponents(src, tok); }