Example usage for org.apache.lucene.analysis.standard ClassicTokenizer ClassicTokenizer

List of usage examples for org.apache.lucene.analysis.standard ClassicTokenizer ClassicTokenizer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.standard ClassicTokenizer ClassicTokenizer.

Prototype

public ClassicTokenizer(AttributeFactory factory) 

Source Link

Document

Creates a new ClassicTokenizer with a given org.apache.lucene.util.AttributeFactory

Usage

From source file:ie.cmrc.smtx.lucene.analysis.LanguageBasedAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    //Tokenizer source = new StandardTokenizer(Version.LUCENE_46, reader);
    //Tokenizer source = new WhitespaceTokenizer(Version.LUCENE_46, reader);
    Tokenizer source = new ClassicTokenizer(reader);
    TokenStream filter = new StandardFilter(source);
    filter = new LowerCaseFilter(filter);
    filter = new StopFilter(filter, this.getStopWordsSet(language));
    filter = new KStemFilter(filter);
    filter = new ASCIIFoldingFilter(filter);
    return new TokenStreamComponents(source, filter);
}

From source file:org.meresco.lucene.analysis.MerescoStandardAnalyzer.java

License:Open Source License

public List<String> post_analyse(String fieldName, String string) throws IOException {
    ClassicTokenizer src = new ClassicTokenizer(new StringReader(string));
    TokenStream tok = this.post_analyzer(fieldName, src);
    return this.readTokenStream(tok);
}

From source file:org.meresco.lucene.analysis.MerescoStandardAnalyzer.java

License:Open Source License

protected Analyzer.TokenStreamComponents pre_analyzer(Reader reader) {
    final ClassicTokenizer src = new ClassicTokenizer(reader);
    TokenStream tok = new ClassicFilter(src);
    tok = new ASCIIFoldingFilter(tok);
    tok = new LowerCaseFilter(tok);
    return new Analyzer.TokenStreamComponents(src, tok);
}