Example usage for org.apache.lucene.analysis.standard ClassicFilter ClassicFilter

List of usage examples for org.apache.lucene.analysis.standard ClassicFilter ClassicFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.standard ClassicFilter ClassicFilter.

Prototype

public ClassicFilter(TokenStream in) 

Source Link

Document

Construct filtering in.

Usage

From source file:lab_mri.CustomAnalyzer.java

@Override
protected TokenStreamComponents createComponents(String string, Reader reader) {
    CharArraySet stopWords = EnglishAnalyzer.getDefaultStopSet();
    Tokenizer tokenizer = new WikipediaTokenizer(reader);
    TokenStream filter = new ClassicFilter(tokenizer);
    filter = new StandardFilter(filter);
    filter = new StopFilter(filter, stopWords);
    filter = new PorterStemFilter(filter);

    filter = new LowerCaseFilter(filter);

    return new TokenStreamComponents(tokenizer, filter);
}

From source file:org.apache.solr.analysis.ClassicFilterFactory.java

License:Apache License

public TokenFilter create(TokenStream input) {
    return new ClassicFilter(input);
}

From source file:org.elasticsearch.analysis.common.ClassicFilterFactory.java

License:Apache License

@Override
public TokenStream create(TokenStream tokenStream) {
    return new ClassicFilter(tokenStream);
}

From source file:org.geotoolkit.lucene.analysis.standard.ClassicAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName) {
    final ClassicTokenizer src = new ClassicTokenizer();
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new ClassicFilter(src);
    tok = new LowerCaseFilter(tok);
    tok = new StopFilter(tok, stopwords);
    return new TokenStreamComponents(src, tok) {
        @Override/*from   w w  w. java  2  s .com*/
        protected void setReader(final Reader reader) {
            src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
            super.setReader(reader);
        }
    };
}

From source file:org.meresco.lucene.analysis.MerescoStandardAnalyzer.java

License:Open Source License

protected Analyzer.TokenStreamComponents pre_analyzer(Reader reader) {
    final ClassicTokenizer src = new ClassicTokenizer(reader);
    TokenStream tok = new ClassicFilter(src);
    tok = new ASCIIFoldingFilter(tok);
    tok = new LowerCaseFilter(tok);
    return new Analyzer.TokenStreamComponents(src, tok);
}

From source file:summarizer.KeywordsGuesser.java

License:Open Source License

public static List<Keyword> guessFromString(String input) throws IOException {

    input = input.replaceAll("-+", "-0");
    input = input.replaceAll("[\\p{Punct}&&[^'-]]+", " ");
    input = input.replaceAll("(?:'(?:[tdsm]|[vr]e|ll))+\\b", "");
    TokenStream tokenStream = new ClassicTokenizer(LUCENE_VERSION, new StringReader(input));
    tokenStream = new LowerCaseFilter(LUCENE_VERSION, tokenStream);
    tokenStream = new ClassicFilter(tokenStream);
    tokenStream = new ASCIIFoldingFilter(tokenStream);
    tokenStream = new StopFilter(LUCENE_VERSION, tokenStream, EnglishAnalyzer.getDefaultStopSet());
    List<Keyword> keywords = new LinkedList<Keyword>();
    CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
    tokenStream.reset();/* ww  w .  ja v  a 2 s .co m*/
    while (tokenStream.incrementToken()) {
        String term = token.toString();
        String stem = stemmize(term);
        if (stem != null) {
            Keyword keyword = find(keywords, new Keyword(stem.replaceAll("-0", "-")));
            keyword.add(term.replaceAll("-0", "-"));
        }
    }
    Collections.sort(keywords);
    return keywords;
}