Example usage for org.apache.lucene.analysis.standard StandardTokenizer setMaxTokenLength

List of usage examples for org.apache.lucene.analysis.standard StandardTokenizer setMaxTokenLength

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.standard StandardTokenizer setMaxTokenLength.

Prototype

public void setMaxTokenLength(int length) 

Source Link

Document

Set the max allowed token length.

Usage

From source file:mllab_lucene.StandardAnalyzerHtml.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
        @Override//from   ww  w . j a  v  a2s .  c om
        protected void setReader(final Reader reader) throws IOException {
            src.setMaxTokenLength(StandardAnalyzerHtml.this.maxTokenLength);
            super.setReader(reader);
        }
    };
}

From source file:nl.knaw.huygens.timbuctoo.lucene.accentanalyzer.MySearchAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName) {
    final StandardTokenizer src = new StandardTokenizer();
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new StandardFilter(src);
    return new TokenStreamComponents(src, tok) {
        @Override//from   ww w  . jav  a2s  .co m
        protected void setReader(final Reader reader) {
            src.setMaxTokenLength(MySearchAnalyzer.this.maxTokenLength);
            super.setReader(reader);
        }
    };
}

From source file:org.apache.blur.analysis.NoStopWordStandardAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
        @Override//from   w ww .ja  v  a2  s. co  m
        protected void setReader(final Reader reader) throws IOException {
            src.setMaxTokenLength(NoStopWordStandardAnalyzer.this.maxTokenLength);
            super.setReader(reader);
        }
    };
}

From source file:org.apache.solr.analysis.StandardTokenizerFactory.java

License:Apache License

public StandardTokenizer create(Reader input) {
    StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, input);
    tokenizer.setMaxTokenLength(maxTokenLength);
    return tokenizer;
}

From source file:org.apache.vxquery.runtime.functions.index.CaseSensitiveAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName) {
    final Tokenizer src;
    if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) {
        StandardTokenizer t = new StandardTokenizer();
        t.setMaxTokenLength(maxTokenLength);
        src = t;/*w  ww .ja va2s. c  o  m*/
    } else {
        StandardTokenizer40 t = new StandardTokenizer40();
        t.setMaxTokenLength(maxTokenLength);
        src = t;
    }
    TokenStream tok = new StandardFilter(src);
    tok = new StopFilter(tok, stopwords);
    return new TokenStreamComponents(src, tok) {
        @Override
        protected void setReader(final Reader reader) {
            int m = CaseSensitiveAnalyzer.this.maxTokenLength;
            if (src instanceof StandardTokenizer) {
                ((StandardTokenizer) src).setMaxTokenLength(m);
            } else {
                ((StandardTokenizer40) src).setMaxTokenLength(m);
            }
            super.setReader(reader);
        }
    };
}

From source file:org.elasticsearch.index.analysis.NGram.NGramAnalyzer.java

License:Apache License

@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final StandardTokenizer source = new StandardTokenizer(Version.LUCENE_43, reader);
    source.setMaxTokenLength(maxTokenLength);
    TokenStream result = new NGramTokenFilter(source, 3, 10);
    return new Analyzer.TokenStreamComponents(source, result);
}

From source file:org.elasticsearch.index.analysis.StandardHtmlStripAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    if (!stopwords.isEmpty()) {
        tok = new StopFilter(matchVersion, tok, stopwords);
    }/*from  w  w  w. j  ava 2  s. c  o  m*/
    return new TokenStreamComponents(src, tok) {
        @Override
        protected void setReader(final Reader reader) throws IOException {
            src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
            super.setReader(reader);
        }
    };
}

From source file:org.elasticsearch.index.analysis.StandardTokenizerFactory.java

License:Apache License

@Override
public Tokenizer create(Reader reader) {
    StandardTokenizer tokenizer = new StandardTokenizer(version, reader);
    tokenizer.setMaxTokenLength(maxTokenLength);
    return tokenizer;
}

From source file:org.karsha.tokenize.DefaultTokenizer.java

License:Open Source License

public TokenStream tokenStream(Reader reader) {

    //StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym);
    StandardTokenizer tokenStream = new StandardTokenizer(Version.LUCENE_35, reader);
    tokenStream.setMaxTokenLength(maxTokenLength);
    TokenStream result = new StandardFilter(tokenStream);

    result = new LowerCaseFilter(result);
    result = new StopFilter(Version.LUCENE_35, result, lu_stop_words);
    result = new StopFilter(Version.LUCENE_35, result, te_stop_words);
    //result = new PorterStemFilter(result);

    return result;
}

From source file:org.karsha.tokenize.SimpleTokenizer.java

License:Open Source License

public TokenStream tokenStream(Reader reader) {
    //StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym);
    StandardTokenizer tokenStream = new StandardTokenizer(Version.LUCENE_35, reader);
    tokenStream.setMaxTokenLength(maxTokenLength);
    TokenStream result = new StandardFilter(tokenStream);

    result = new LowerCaseFilter(result);
    //result = new StopFilter(result, TERRIER_STOP_WORDS);
    //result = new PorterStemFilter(result);

    return result;
}