Example usage for org.apache.lucene.analysis.standard StandardTokenizer setMaxTokenLength

List of usage examples for org.apache.lucene.analysis.standard StandardTokenizer setMaxTokenLength

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.standard StandardTokenizer setMaxTokenLength.

Prototype

public void setMaxTokenLength(int length) 

Source Link

Document

Set the max allowed token length.

Usage

From source file:de.jetwick.es.JetwickAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    src.setReplaceInvalidAcronym(replaceInvalidAcronym);
    TokenStream tok = JetwickFilterFactory.myCreate(src, handleAsChar, handleAsDigit, generateWordParts,
            generateNumberParts, catenateWords, catenateNumbers, catenateAll, splitOnCaseChange,
            preserveOriginal, splitOnNumerics, stemEnglishPossessive, protectedWords);
    tok = new LowerCaseFilter(matchVersion, tok);
    return new TokenStreamComponents(src, tok) {

        @Override/*from  w ww . ja va 2 s.  co m*/
        protected boolean reset(final Reader reader) throws IOException {
            src.setMaxTokenLength(JetwickAnalyzer.this.maxTokenLength);
            return super.reset(reader);
        }
    };
}

From source file:dk.defxws.fgslucene.PhaidraAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new StandardFilter(matchVersion, src);
    // unipd adding ElisionFilter for apostrophes
    tok = new ElisionFilter(matchVersion, tok, DEFAULT_ARTICLES);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    // rasta: adding ASCIIFoldingFilter to enable search for accent 
    tok = new ASCIIFoldingFilter(tok);
    return new TokenStreamComponents(src, tok) {
        @Override//  ww  w  . j av a 2 s  .  c o m
        protected boolean reset(final Reader reader) throws IOException {
            src.setMaxTokenLength(PhaidraAnalyzer.this.maxTokenLength);
            return super.reset(reader);
        }
    };
}

From source file:edu.harvard.iq.dvn.core.index.DVNSearchAnalyzer.java

License:Apache License

public TokenStream tokenStream(String fieldName, Reader reader) {
    /* It seems that LUCENE_CURRENT is ultimately what we should use, but there is
     an ominous warning in the source code about using it -- using LUCENE_29 for now, will look into this further */
    //    StandardTokenizer tokenStream = new StandardTokenizer(Version.LUCENE_CURRENT,reader);
    StandardTokenizer tokenStream = new StandardTokenizer(Version.LUCENE_29, reader);
    tokenStream.setMaxTokenLength(maxTokenLength);
    TokenStream result = new StandardFilter(tokenStream);
    result = new LowerCaseFilter(result);
    result = new PorterStemFilter(result);
    return result;
}

From source file:edu.ur.lucene.analysis.StandardWithACIIFoldingFilter.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    src.setReplaceInvalidAcronym(replaceInvalidAcronym);
    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    tok = new ICUFoldingFilter(tok);
    return new TokenStreamComponents(src, tok) {
        @Override/*from   w  w w  . java2s  .  co m*/
        protected boolean reset(final Reader reader) throws IOException {
            src.setMaxTokenLength(StandardWithACIIFoldingFilter.this.maxTokenLength);
            return super.reset(reader);
        }
    };
}

From source file:indexing.MyStandardAnalyzer.java

License:Apache License

@Override
public TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {

        @Override//  w  w w.  jav  a2 s  .  c  o m
        public void setReader(final Reader reader) throws IOException {
            src.setMaxTokenLength(MyStandardAnalyzer.this.maxTokenLength);
            super.setReader(reader);
        }
    };
}

From source file:intelligentWebAlgorithms.algos.search.lucene.analyzer.CustomAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {

    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
        @Override//from w  w  w .j a  va  2  s .c om
        protected void setReader(final Reader reader) throws IOException {
            src.setMaxTokenLength(CustomAnalyzer.this.maxTokenLength);
            super.setReader(reader);
        }
    };
}

From source file:ivory.tokenize.LuceneTokenizer.java

License:Apache License

private TokenStream tokenStream(Reader reader) {
    StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym);
    tokenStream.setMaxTokenLength(maxTokenLength);
    TokenStream result = new StandardFilter(tokenStream);

    result = new LowerCaseFilter(result);
    result = new StopFilter(result, TERRIER_STOP_WORDS);
    result = new PorterStemFilter(result);

    return result;
}

From source file:lucenejavafx.CustomAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    //src.setReplaceInvalidAcronym(replaceInvalidAcronym);

    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    tok = new PorterStemFilter(tok);
    return new TokenStreamComponents(src, tok) {

        protected boolean reset(final Reader reader) throws IOException {
            src.setMaxTokenLength(CustomAnalyzer.this.maxTokenLength);
            return reset(reader);
        }//  www  .j av  a2 s .  co  m
    };
}

From source file:luceneprueba.CustomAnalyzers.ReviewAnalyzer.java

@Override
protected TokenStreamComponents createComponents(final String fieldName) {
    final StandardTokenizer src = new StandardTokenizer();
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tokenizer = new StandardFilter(src);
    tokenizer = new LowerCaseFilter(tokenizer);
    tokenizer = new StopFilter(tokenizer, stopwords);
    return new TokenStreamComponents(src, tokenizer) {
        @Override//  w  w w.j  av  a  2s.com
        protected void setReader(final Reader reader) {
            src.setMaxTokenLength(ReviewAnalyzer.this.maxTokenLength);
            super.setReader(reader);
        }
    };
}

From source file:mj.ocraptor.database.StandardAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);

    return new TokenStreamComponents(src, tok) {
        @Override/*w ww .  ja  v  a  2  s. c  om*/
        protected boolean reset(final Reader reader) throws IOException {
            Reader reader2 = new MappingCharFilter(map, CharReader.get(reader));
            src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
            return super.reset(reader2);
        }
    };
}